# Sebastian Raschka 2014-2020 # mlxtend Machine Learning Library Extensions # # Author: Sebastian Raschka # # License: BSD 3 clause import numpy as np import scipy.stats from itertools import combinations def mcnemar_table(y_target, y_model1, y_model2): """ Compute a 2x2 contigency table for McNemar's test. Parameters ----------- y_target : array-like, shape=[n_samples] True class labels as 1D NumPy array. y_model1 : array-like, shape=[n_samples] Predicted class labels from model as 1D NumPy array. y_model2 : array-like, shape=[n_samples] Predicted class labels from model 2 as 1D NumPy array. Returns ---------- tb : array-like, shape=[2, 2] 2x2 contingency table with the following contents: a: tb[0, 0]: # of samples that both models predicted correctly b: tb[0, 1]: # of samples that model 1 got right and model 2 got wrong c: tb[1, 0]: # of samples that model 2 got right and model 1 got wrong d: tb[1, 1]: # of samples that both models predicted incorrectly Examples ----------- For usage examples, please see http://rasbt.github.io/mlxtend/user_guide/evaluate/mcnemar_table/ """ for ary in (y_target, y_model1, y_model2): if len(ary.shape) != 1: raise ValueError('One or more input arrays are not 1-dimensional.') if y_target.shape[0] != y_model1.shape[0]: raise ValueError('y_target and y_model1 contain a different number' ' of elements.') if y_target.shape[0] != y_model2.shape[0]: raise ValueError('y_target and y_model2 contain a different number' ' of elements.') m1_vs_true = (y_target == y_model1).astype(int) m2_vs_true = (y_target == y_model2).astype(int) plus_true = m1_vs_true + m2_vs_true minus_true = m1_vs_true - m2_vs_true tb = np.zeros((2, 2), dtype=int) tb[0, 0] = np.sum(plus_true == 2) tb[0, 1] = np.sum(minus_true == 1) tb[1, 0] = np.sum(minus_true == -1) tb[1, 1] = np.sum(plus_true == 0) return tb def mcnemar_tables(y_target, *y_model_predictions): """ Compute multiple 2x2 contigency tables for McNemar's test or Cochran's Q test. Parameters ----------- y_target : array-like, shape=[n_samples] True class labels as 1D NumPy array. y_model_predictions : array-like, shape=[n_samples] Predicted class labels for a model. Returns ---------- tables : dict Dictionary of NumPy arrays with shape=[2, 2]. Each dictionary key names the two models to be compared based on the order the models were passed as `*y_model_predictions`. The number of dictionary entries is equal to the number of pairwise combinations between the m models, i.e., "m choose 2." For example the following target array (containing the true labels) and 3 models - y_true = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) - y_mod0 = np.array([0, 1, 0, 0, 0, 1, 1, 0, 0, 0]) - y_mod0 = np.array([0, 0, 1, 1, 0, 1, 1, 0, 0, 0]) - y_mod0 = np.array([0, 1, 1, 1, 0, 1, 0, 0, 0, 0]) would result in the following dictionary: {'model_0 vs model_1': array([[ 4., 1.], [ 2., 3.]]), 'model_0 vs model_2': array([[ 3., 0.], [ 3., 4.]]), 'model_1 vs model_2': array([[ 3., 0.], [ 2., 5.]])} Each array is structured in the following way: - tb[0, 0]: # of samples that both models predicted correctly - tb[0, 1]: # of samples that model a got right and model b got wrong - tb[1, 0]: # of samples that model b got right and model a got wrong - tb[1, 1]: # of samples that both models predicted incorrectly Examples ----------- For usage examples, please see http://rasbt.github.io/mlxtend/user_guide/evaluate/mcnemar_tables/ """ model_lens = set() y_model_predictions = list(y_model_predictions) for ary in ([y_target] + y_model_predictions): if len(ary.shape) != 1: raise ValueError('One or more input arrays are not 1-dimensional.') model_lens.add(ary.shape[0]) if len(model_lens) > 1: raise ValueError('Each prediction array must have the ' 'same number of samples.') num_models = len(y_model_predictions) if num_models < 2: raise ValueError('Provide at least 2 model prediction arrays.') tables = {} for comb in combinations(range(num_models), 2): tb = np.zeros((2, 2)) model1_vs_true = (y_target == y_model_predictions[comb[0]]).astype(int) model2_vs_true = (y_target == y_model_predictions[comb[1]]).astype(int) plus_true = model1_vs_true + model2_vs_true minus_true = model1_vs_true - model2_vs_true tb[0, 0] = np.sum(plus_true == 2) tb[0, 1] = np.sum(minus_true == 1) tb[1, 0] = np.sum(minus_true == -1) tb[1, 1] = np.sum(plus_true == 0) name_str = 'model_%s vs model_%s' % (comb[0], comb[1]) tables[name_str] = tb return tables def mcnemar(ary, corrected=True, exact=False): """ McNemar test for paired nominal data Parameters ----------- ary : array-like, shape=[2, 2] 2 x 2 contigency table (as returned by evaluate.mcnemar_table), where a: ary[0, 0]: # of samples that both models predicted correctly b: ary[0, 1]: # of samples that model 1 got right and model 2 got wrong c: ary[1, 0]: # of samples that model 2 got right and model 1 got wrong d: aryCell [1, 1]: # of samples that both models predicted incorrectly corrected : array-like, shape=[n_samples] (default: True) Uses Edward's continuity correction for chi-squared if `True` exact : bool, (default: False) If `True`, uses an exact binomial test comparing b to a binomial distribution with n = b + c and p = 0.5. It is highly recommended to use `exact=True` for sample sizes < 25 since chi-squared is not well-approximated by the chi-squared distribution! Returns ----------- chi2, p : float or None, float Returns the chi-squared value and the p-value; if `exact=True` (default: `False`), `chi2` is `None` Examples ----------- For usage examples, please see http://rasbt.github.io/mlxtend/user_guide/evaluate/mcnemar/ """ if not ary.shape == (2, 2): raise ValueError('Input array must be a 2x2 array.') b = ary[0, 1] c = ary[1, 0] n = b + c if not exact: if corrected: chi2 = (abs(ary[0, 1] - ary[1, 0]) - 1.0)**2 / float(n) else: chi2 = (ary[0, 1] - ary[1, 0])**2 / float(n) p = scipy.stats.distributions.chi2.sf(chi2, 1) else: chi2 = min(b, c) p = min(scipy.stats.binom.cdf(chi2, b + c, .5) * 2., 1.) # this is equivalent to the following code: # # p = 0 # for i in range(max(b, c), b+c+1): # p += scipy.special.binom(b+c, i) * 0.5**i * (1-0.5)**((b+c)-i) # p = 2*p return chi2, p