178 lines
6.9 KiB
Python
178 lines
6.9 KiB
Python
import numpy as np
|
|
from functools import partial
|
|
from scipy import stats
|
|
|
|
|
|
def _bws_input_validation(x, y, alternative, method):
|
|
''' Input validation and standardization for bws test'''
|
|
x, y = np.atleast_1d(x, y)
|
|
if x.ndim > 1 or y.ndim > 1:
|
|
raise ValueError('`x` and `y` must be exactly one-dimensional.')
|
|
if np.isnan(x).any() or np.isnan(y).any():
|
|
raise ValueError('`x` and `y` must not contain NaNs.')
|
|
if np.size(x) == 0 or np.size(y) == 0:
|
|
raise ValueError('`x` and `y` must be of nonzero size.')
|
|
|
|
z = stats.rankdata(np.concatenate((x, y)))
|
|
x, y = z[:len(x)], z[len(x):]
|
|
|
|
alternatives = {'two-sided', 'less', 'greater'}
|
|
alternative = alternative.lower()
|
|
if alternative not in alternatives:
|
|
raise ValueError(f'`alternative` must be one of {alternatives}.')
|
|
|
|
method = stats.PermutationMethod() if method is None else method
|
|
if not isinstance(method, stats.PermutationMethod):
|
|
raise ValueError('`method` must be an instance of '
|
|
'`scipy.stats.PermutationMethod`')
|
|
|
|
return x, y, alternative, method
|
|
|
|
|
|
def _bws_statistic(x, y, alternative, axis):
|
|
'''Compute the BWS test statistic for two independent samples'''
|
|
# Public function currently does not accept `axis`, but `permutation_test`
|
|
# uses `axis` to make vectorized call.
|
|
|
|
Ri, Hj = np.sort(x, axis=axis), np.sort(y, axis=axis)
|
|
n, m = Ri.shape[axis], Hj.shape[axis]
|
|
i, j = np.arange(1, n+1), np.arange(1, m+1)
|
|
|
|
Bx_num = Ri - (m + n)/n * i
|
|
By_num = Hj - (m + n)/m * j
|
|
|
|
if alternative == 'two-sided':
|
|
Bx_num *= Bx_num
|
|
By_num *= By_num
|
|
else:
|
|
Bx_num *= np.abs(Bx_num)
|
|
By_num *= np.abs(By_num)
|
|
|
|
Bx_den = i/(n+1) * (1 - i/(n+1)) * m*(m+n)/n
|
|
By_den = j/(m+1) * (1 - j/(m+1)) * n*(m+n)/m
|
|
|
|
Bx = 1/n * np.sum(Bx_num/Bx_den, axis=axis)
|
|
By = 1/m * np.sum(By_num/By_den, axis=axis)
|
|
|
|
B = (Bx + By) / 2 if alternative == 'two-sided' else (Bx - By) / 2
|
|
|
|
return B
|
|
|
|
|
|
def bws_test(x, y, *, alternative="two-sided", method=None):
|
|
r'''Perform the Baumgartner-Weiss-Schindler test on two independent samples.
|
|
|
|
The Baumgartner-Weiss-Schindler (BWS) test is a nonparametric test of
|
|
the null hypothesis that the distribution underlying sample `x`
|
|
is the same as the distribution underlying sample `y`. Unlike
|
|
the Kolmogorov-Smirnov, Wilcoxon, and Cramer-Von Mises tests,
|
|
the BWS test weights the integral by the variance of the difference
|
|
in cumulative distribution functions (CDFs), emphasizing the tails of the
|
|
distributions, which increases the power of the test in many applications.
|
|
|
|
Parameters
|
|
----------
|
|
x, y : array-like
|
|
1-d arrays of samples.
|
|
alternative : {'two-sided', 'less', 'greater'}, optional
|
|
Defines the alternative hypothesis. Default is 'two-sided'.
|
|
Let *F(u)* and *G(u)* be the cumulative distribution functions of the
|
|
distributions underlying `x` and `y`, respectively. Then the following
|
|
alternative hypotheses are available:
|
|
|
|
* 'two-sided': the distributions are not equal, i.e. *F(u) ≠ G(u)* for
|
|
at least one *u*.
|
|
* 'less': the distribution underlying `x` is stochastically less than
|
|
the distribution underlying `y`, i.e. *F(u) >= G(u)* for all *u*.
|
|
* 'greater': the distribution underlying `x` is stochastically greater
|
|
than the distribution underlying `y`, i.e. *F(u) <= G(u)* for all
|
|
*u*.
|
|
|
|
Under a more restrictive set of assumptions, the alternative hypotheses
|
|
can be expressed in terms of the locations of the distributions;
|
|
see [2] section 5.1.
|
|
method : PermutationMethod, optional
|
|
Configures the method used to compute the p-value. The default is
|
|
the default `PermutationMethod` object.
|
|
|
|
Returns
|
|
-------
|
|
res : PermutationTestResult
|
|
An object with attributes:
|
|
|
|
statistic : float
|
|
The observed test statistic of the data.
|
|
pvalue : float
|
|
The p-value for the given alternative.
|
|
null_distribution : ndarray
|
|
The values of the test statistic generated under the null hypothesis.
|
|
|
|
See also
|
|
--------
|
|
scipy.stats.wilcoxon, scipy.stats.mannwhitneyu, scipy.stats.ttest_ind
|
|
|
|
Notes
|
|
-----
|
|
When ``alternative=='two-sided'``, the statistic is defined by the
|
|
equations given in [1]_ Section 2. This statistic is not appropriate for
|
|
one-sided alternatives; in that case, the statistic is the *negative* of
|
|
that given by the equations in [1]_ Section 2. Consequently, when the
|
|
distribution of the first sample is stochastically greater than that of the
|
|
second sample, the statistic will tend to be positive.
|
|
|
|
References
|
|
----------
|
|
.. [1] Neuhäuser, M. (2005). Exact Tests Based on the
|
|
Baumgartner-Weiss-Schindler Statistic: A Survey. Statistical Papers,
|
|
46(1), 1-29.
|
|
.. [2] Fay, M. P., & Proschan, M. A. (2010). Wilcoxon-Mann-Whitney or t-test?
|
|
On assumptions for hypothesis tests and multiple interpretations of
|
|
decision rules. Statistics surveys, 4, 1.
|
|
|
|
Examples
|
|
--------
|
|
We follow the example of table 3 in [1]_: Fourteen children were divided
|
|
randomly into two groups. Their ranks at performing a specific tests are
|
|
as follows.
|
|
|
|
>>> import numpy as np
|
|
>>> x = [1, 2, 3, 4, 6, 7, 8]
|
|
>>> y = [5, 9, 10, 11, 12, 13, 14]
|
|
|
|
We use the BWS test to assess whether there is a statistically significant
|
|
difference between the two groups.
|
|
The null hypothesis is that there is no difference in the distributions of
|
|
performance between the two groups. We decide that a significance level of
|
|
1% is required to reject the null hypothesis in favor of the alternative
|
|
that the distributions are different.
|
|
Since the number of samples is very small, we can compare the observed test
|
|
statistic against the *exact* distribution of the test statistic under the
|
|
null hypothesis.
|
|
|
|
>>> from scipy.stats import bws_test
|
|
>>> res = bws_test(x, y)
|
|
>>> print(res.statistic)
|
|
5.132167152575315
|
|
|
|
This agrees with :math:`B = 5.132` reported in [1]_. The *p*-value produced
|
|
by `bws_test` also agrees with :math:`p = 0.0029` reported in [1]_.
|
|
|
|
>>> print(res.pvalue)
|
|
0.002913752913752914
|
|
|
|
Because the p-value is below our threshold of 1%, we take this as evidence
|
|
against the null hypothesis in favor of the alternative that there is a
|
|
difference in performance between the two groups.
|
|
'''
|
|
|
|
x, y, alternative, method = _bws_input_validation(x, y, alternative,
|
|
method)
|
|
bws_statistic = partial(_bws_statistic, alternative=alternative)
|
|
|
|
permutation_alternative = 'less' if alternative == 'less' else 'greater'
|
|
res = stats.permutation_test((x, y), bws_statistic,
|
|
alternative=permutation_alternative,
|
|
**method._asdict())
|
|
|
|
return res
|