405 lines
17 KiB
Python
405 lines
17 KiB
Python
import copy
|
|
|
|
import numpy as np
|
|
import pytest
|
|
from numpy.testing import assert_allclose
|
|
|
|
from scipy import stats
|
|
from scipy.stats._multicomp import _pvalue_dunnett, DunnettResult
|
|
|
|
|
|
class TestDunnett:
|
|
# For the following tests, p-values were computed using Matlab, e.g.
|
|
# sample = [18. 15. 18. 16. 17. 15. 14. 14. 14. 15. 15....
|
|
# 14. 15. 14. 22. 18. 21. 21. 10. 10. 11. 9....
|
|
# 25. 26. 17.5 16. 15.5 14.5 22. 22. 24. 22.5 29....
|
|
# 24.5 20. 18. 18.5 17.5 26.5 13. 16.5 13. 13. 13....
|
|
# 28. 27. 34. 31. 29. 27. 24. 23. 38. 36. 25....
|
|
# 38. 26. 22. 36. 27. 27. 32. 28. 31....
|
|
# 24. 27. 33. 32. 28. 19. 37. 31. 36. 36....
|
|
# 34. 38. 32. 38. 32....
|
|
# 26. 24. 26. 25. 29. 29.5 16.5 36. 44....
|
|
# 25. 27. 19....
|
|
# 25. 20....
|
|
# 28.];
|
|
# j = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
|
|
# 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
|
|
# 0 0 0 0...
|
|
# 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1...
|
|
# 2 2 2 2 2 2 2 2 2...
|
|
# 3 3 3...
|
|
# 4 4...
|
|
# 5];
|
|
# [~, ~, stats] = anova1(sample, j, "off");
|
|
# [results, ~, ~, gnames] = multcompare(stats, ...
|
|
# "CriticalValueType", "dunnett", ...
|
|
# "Approximate", false);
|
|
# tbl = array2table(results, "VariableNames", ...
|
|
# ["Group", "Control Group", "Lower Limit", ...
|
|
# "Difference", "Upper Limit", "P-value"]);
|
|
# tbl.("Group") = gnames(tbl.("Group"));
|
|
# tbl.("Control Group") = gnames(tbl.("Control Group"))
|
|
|
|
# Matlab doesn't report the statistic, so the statistics were
|
|
# computed using R multcomp `glht`, e.g.:
|
|
# library(multcomp)
|
|
# options(digits=16)
|
|
# control < - c(18.0, 15.0, 18.0, 16.0, 17.0, 15.0, 14.0, 14.0, 14.0,
|
|
# 15.0, 15.0, 14.0, 15.0, 14.0, 22.0, 18.0, 21.0, 21.0,
|
|
# 10.0, 10.0, 11.0, 9.0, 25.0, 26.0, 17.5, 16.0, 15.5,
|
|
# 14.5, 22.0, 22.0, 24.0, 22.5, 29.0, 24.5, 20.0, 18.0,
|
|
# 18.5, 17.5, 26.5, 13.0, 16.5, 13.0, 13.0, 13.0, 28.0,
|
|
# 27.0, 34.0, 31.0, 29.0, 27.0, 24.0, 23.0, 38.0, 36.0,
|
|
# 25.0, 38.0, 26.0, 22.0, 36.0, 27.0, 27.0, 32.0, 28.0,
|
|
# 31.0)
|
|
# t < - c(24.0, 27.0, 33.0, 32.0, 28.0, 19.0, 37.0, 31.0, 36.0, 36.0,
|
|
# 34.0, 38.0, 32.0, 38.0, 32.0)
|
|
# w < - c(26.0, 24.0, 26.0, 25.0, 29.0, 29.5, 16.5, 36.0, 44.0)
|
|
# x < - c(25.0, 27.0, 19.0)
|
|
# y < - c(25.0, 20.0)
|
|
# z < - c(28.0)
|
|
#
|
|
# groups = factor(rep(c("control", "t", "w", "x", "y", "z"),
|
|
# times=c(length(control), length(t), length(w),
|
|
# length(x), length(y), length(z))))
|
|
# df < - data.frame(response=c(control, t, w, x, y, z),
|
|
# group=groups)
|
|
# model < - aov(response
|
|
# ~group, data = df)
|
|
# test < - glht(model=model,
|
|
# linfct=mcp(group="Dunnett"),
|
|
# alternative="g")
|
|
# summary(test)
|
|
# confint(test)
|
|
# p-values agreed with those produced by Matlab to at least atol=1e-3
|
|
|
|
# From Matlab's documentation on multcompare
|
|
samples_1 = [
|
|
[
|
|
24.0, 27.0, 33.0, 32.0, 28.0, 19.0, 37.0, 31.0, 36.0, 36.0,
|
|
34.0, 38.0, 32.0, 38.0, 32.0
|
|
],
|
|
[26.0, 24.0, 26.0, 25.0, 29.0, 29.5, 16.5, 36.0, 44.0],
|
|
[25.0, 27.0, 19.0],
|
|
[25.0, 20.0],
|
|
[28.0]
|
|
]
|
|
control_1 = [
|
|
18.0, 15.0, 18.0, 16.0, 17.0, 15.0, 14.0, 14.0, 14.0, 15.0, 15.0,
|
|
14.0, 15.0, 14.0, 22.0, 18.0, 21.0, 21.0, 10.0, 10.0, 11.0, 9.0,
|
|
25.0, 26.0, 17.5, 16.0, 15.5, 14.5, 22.0, 22.0, 24.0, 22.5, 29.0,
|
|
24.5, 20.0, 18.0, 18.5, 17.5, 26.5, 13.0, 16.5, 13.0, 13.0, 13.0,
|
|
28.0, 27.0, 34.0, 31.0, 29.0, 27.0, 24.0, 23.0, 38.0, 36.0, 25.0,
|
|
38.0, 26.0, 22.0, 36.0, 27.0, 27.0, 32.0, 28.0, 31.0
|
|
]
|
|
pvalue_1 = [4.727e-06, 0.022346, 0.97912, 0.99953, 0.86579] # Matlab
|
|
# Statistic, alternative p-values, and CIs computed with R multcomp `glht`
|
|
p_1_twosided = [1e-4, 0.02237, 0.97913, 0.99953, 0.86583]
|
|
p_1_greater = [1e-4, 0.011217, 0.768500, 0.896991, 0.577211]
|
|
p_1_less = [1, 1, 0.99660, 0.98398, .99953]
|
|
statistic_1 = [5.27356, 2.91270, 0.60831, 0.27002, 0.96637]
|
|
ci_1_twosided = [[5.3633917835622, 0.7296142201217, -8.3879817106607,
|
|
-11.9090753452911, -11.7655021543469],
|
|
[15.9709832164378, 13.8936496687672, 13.4556900439941,
|
|
14.6434503452911, 25.4998771543469]]
|
|
ci_1_greater = [5.9036402398526, 1.4000632918725, -7.2754756323636,
|
|
-10.5567456382391, -9.8675629499576]
|
|
ci_1_less = [15.4306165948619, 13.2230539537359, 12.3429406339544,
|
|
13.2908248513211, 23.6015228251660]
|
|
pvalues_1 = dict(twosided=p_1_twosided, less=p_1_less, greater=p_1_greater)
|
|
cis_1 = dict(twosided=ci_1_twosided, less=ci_1_less, greater=ci_1_greater)
|
|
case_1 = dict(samples=samples_1, control=control_1, statistic=statistic_1,
|
|
pvalues=pvalues_1, cis=cis_1)
|
|
|
|
# From Dunnett1955 comparing with R's DescTools: DunnettTest
|
|
samples_2 = [[9.76, 8.80, 7.68, 9.36], [12.80, 9.68, 12.16, 9.20, 10.55]]
|
|
control_2 = [7.40, 8.50, 7.20, 8.24, 9.84, 8.32]
|
|
pvalue_2 = [0.6201, 0.0058]
|
|
# Statistic, alternative p-values, and CIs computed with R multcomp `glht`
|
|
p_2_twosided = [0.6201020, 0.0058254]
|
|
p_2_greater = [0.3249776, 0.0029139]
|
|
p_2_less = [0.91676, 0.99984]
|
|
statistic_2 = [0.85703, 3.69375]
|
|
ci_2_twosided = [[-1.2564116462124, 0.8396273539789],
|
|
[2.5564116462124, 4.4163726460211]]
|
|
ci_2_greater = [-0.9588591188156, 1.1187563667543]
|
|
ci_2_less = [2.2588591188156, 4.1372436332457]
|
|
pvalues_2 = dict(twosided=p_2_twosided, less=p_2_less, greater=p_2_greater)
|
|
cis_2 = dict(twosided=ci_2_twosided, less=ci_2_less, greater=ci_2_greater)
|
|
case_2 = dict(samples=samples_2, control=control_2, statistic=statistic_2,
|
|
pvalues=pvalues_2, cis=cis_2)
|
|
|
|
samples_3 = [[55, 64, 64], [55, 49, 52], [50, 44, 41]]
|
|
control_3 = [55, 47, 48]
|
|
pvalue_3 = [0.0364, 0.8966, 0.4091]
|
|
# Statistic, alternative p-values, and CIs computed with R multcomp `glht`
|
|
p_3_twosided = [0.036407, 0.896539, 0.409295]
|
|
p_3_greater = [0.018277, 0.521109, 0.981892]
|
|
p_3_less = [0.99944, 0.90054, 0.20974]
|
|
statistic_3 = [3.09073, 0.56195, -1.40488]
|
|
ci_3_twosided = [[0.7529028025053, -8.2470971974947, -15.2470971974947],
|
|
[21.2470971974947, 12.2470971974947, 5.2470971974947]]
|
|
ci_3_greater = [2.4023682323149, -6.5976317676851, -13.5976317676851]
|
|
ci_3_less = [19.5984402363662, 10.5984402363662, 3.5984402363662]
|
|
pvalues_3 = dict(twosided=p_3_twosided, less=p_3_less, greater=p_3_greater)
|
|
cis_3 = dict(twosided=ci_3_twosided, less=ci_3_less, greater=ci_3_greater)
|
|
case_3 = dict(samples=samples_3, control=control_3, statistic=statistic_3,
|
|
pvalues=pvalues_3, cis=cis_3)
|
|
|
|
# From Thomson and Short,
|
|
# Mucociliary function in health, chronic obstructive airway disease,
|
|
# and asbestosis, Journal of Applied Physiology, 1969. Table 1
|
|
# Comparing with R's DescTools: DunnettTest
|
|
samples_4 = [[3.8, 2.7, 4.0, 2.4], [2.8, 3.4, 3.7, 2.2, 2.0]]
|
|
control_4 = [2.9, 3.0, 2.5, 2.6, 3.2]
|
|
pvalue_4 = [0.5832, 0.9982]
|
|
# Statistic, alternative p-values, and CIs computed with R multcomp `glht`
|
|
p_4_twosided = [0.58317, 0.99819]
|
|
p_4_greater = [0.30225, 0.69115]
|
|
p_4_less = [0.91929, 0.65212]
|
|
statistic_4 = [0.90875, -0.05007]
|
|
ci_4_twosided = [[-0.6898153448579, -1.0333456251632],
|
|
[1.4598153448579, 0.9933456251632]]
|
|
ci_4_greater = [-0.5186459268412, -0.8719655502147 ]
|
|
ci_4_less = [1.2886459268412, 0.8319655502147]
|
|
pvalues_4 = dict(twosided=p_4_twosided, less=p_4_less, greater=p_4_greater)
|
|
cis_4 = dict(twosided=ci_4_twosided, less=ci_4_less, greater=ci_4_greater)
|
|
case_4 = dict(samples=samples_4, control=control_4, statistic=statistic_4,
|
|
pvalues=pvalues_4, cis=cis_4)
|
|
|
|
@pytest.mark.parametrize(
|
|
'rho, n_groups, df, statistic, pvalue, alternative',
|
|
[
|
|
# From Dunnett1955
|
|
# Tables 1a and 1b pages 1117-1118
|
|
(0.5, 1, 10, 1.81, 0.05, "greater"), # different than two-sided
|
|
(0.5, 3, 10, 2.34, 0.05, "greater"),
|
|
(0.5, 2, 30, 1.99, 0.05, "greater"),
|
|
(0.5, 5, 30, 2.33, 0.05, "greater"),
|
|
(0.5, 4, 12, 3.32, 0.01, "greater"),
|
|
(0.5, 7, 12, 3.56, 0.01, "greater"),
|
|
(0.5, 2, 60, 2.64, 0.01, "greater"),
|
|
(0.5, 4, 60, 2.87, 0.01, "greater"),
|
|
(0.5, 4, 60, [2.87, 2.21], [0.01, 0.05], "greater"),
|
|
# Tables 2a and 2b pages 1119-1120
|
|
(0.5, 1, 10, 2.23, 0.05, "two-sided"), # two-sided
|
|
(0.5, 3, 10, 2.81, 0.05, "two-sided"),
|
|
(0.5, 2, 30, 2.32, 0.05, "two-sided"),
|
|
(0.5, 3, 20, 2.57, 0.05, "two-sided"),
|
|
(0.5, 4, 12, 3.76, 0.01, "two-sided"),
|
|
(0.5, 7, 12, 4.08, 0.01, "two-sided"),
|
|
(0.5, 2, 60, 2.90, 0.01, "two-sided"),
|
|
(0.5, 4, 60, 3.14, 0.01, "two-sided"),
|
|
(0.5, 4, 60, [3.14, 2.55], [0.01, 0.05], "two-sided"),
|
|
],
|
|
)
|
|
def test_critical_values(
|
|
self, rho, n_groups, df, statistic, pvalue, alternative
|
|
):
|
|
rng = np.random.default_rng(165250594791731684851746311027739134893)
|
|
rho = np.full((n_groups, n_groups), rho)
|
|
np.fill_diagonal(rho, 1)
|
|
|
|
statistic = np.array(statistic)
|
|
res = _pvalue_dunnett(
|
|
rho=rho, df=df, statistic=statistic,
|
|
alternative=alternative,
|
|
rng=rng
|
|
)
|
|
assert_allclose(res, pvalue, atol=5e-3)
|
|
|
|
@pytest.mark.parametrize(
|
|
'samples, control, pvalue, statistic',
|
|
[
|
|
(samples_1, control_1, pvalue_1, statistic_1),
|
|
(samples_2, control_2, pvalue_2, statistic_2),
|
|
(samples_3, control_3, pvalue_3, statistic_3),
|
|
(samples_4, control_4, pvalue_4, statistic_4),
|
|
]
|
|
)
|
|
def test_basic(self, samples, control, pvalue, statistic):
|
|
rng = np.random.default_rng(11681140010308601919115036826969764808)
|
|
|
|
res = stats.dunnett(*samples, control=control, random_state=rng)
|
|
|
|
assert isinstance(res, DunnettResult)
|
|
assert_allclose(res.statistic, statistic, rtol=5e-5)
|
|
assert_allclose(res.pvalue, pvalue, rtol=1e-2, atol=1e-4)
|
|
|
|
@pytest.mark.parametrize(
|
|
'alternative',
|
|
['two-sided', 'less', 'greater']
|
|
)
|
|
def test_ttest_ind(self, alternative):
|
|
# check that `dunnett` agrees with `ttest_ind`
|
|
# when there are only two groups
|
|
rng = np.random.default_rng(114184017807316971636137493526995620351)
|
|
|
|
for _ in range(10):
|
|
sample = rng.integers(-100, 100, size=(10,))
|
|
control = rng.integers(-100, 100, size=(10,))
|
|
|
|
res = stats.dunnett(
|
|
sample, control=control,
|
|
alternative=alternative, random_state=rng
|
|
)
|
|
ref = stats.ttest_ind(
|
|
sample, control,
|
|
alternative=alternative, random_state=rng
|
|
)
|
|
|
|
assert_allclose(res.statistic, ref.statistic, rtol=1e-3, atol=1e-5)
|
|
assert_allclose(res.pvalue, ref.pvalue, rtol=1e-3, atol=1e-5)
|
|
|
|
@pytest.mark.parametrize(
|
|
'alternative, pvalue',
|
|
[
|
|
('less', [0, 1]),
|
|
('greater', [1, 0]),
|
|
('two-sided', [0, 0]),
|
|
]
|
|
)
|
|
def test_alternatives(self, alternative, pvalue):
|
|
rng = np.random.default_rng(114184017807316971636137493526995620351)
|
|
|
|
# width of 20 and min diff between samples/control is 60
|
|
# and maximal diff would be 100
|
|
sample_less = rng.integers(0, 20, size=(10,))
|
|
control = rng.integers(80, 100, size=(10,))
|
|
sample_greater = rng.integers(160, 180, size=(10,))
|
|
|
|
res = stats.dunnett(
|
|
sample_less, sample_greater, control=control,
|
|
alternative=alternative, random_state=rng
|
|
)
|
|
assert_allclose(res.pvalue, pvalue, atol=1e-7)
|
|
|
|
ci = res.confidence_interval()
|
|
# two-sided is comparable for high/low
|
|
if alternative == 'less':
|
|
assert np.isneginf(ci.low).all()
|
|
assert -100 < ci.high[0] < -60
|
|
assert 60 < ci.high[1] < 100
|
|
elif alternative == 'greater':
|
|
assert -100 < ci.low[0] < -60
|
|
assert 60 < ci.low[1] < 100
|
|
assert np.isposinf(ci.high).all()
|
|
elif alternative == 'two-sided':
|
|
assert -100 < ci.low[0] < -60
|
|
assert 60 < ci.low[1] < 100
|
|
assert -100 < ci.high[0] < -60
|
|
assert 60 < ci.high[1] < 100
|
|
|
|
@pytest.mark.parametrize("case", [case_1, case_2, case_3, case_4])
|
|
@pytest.mark.parametrize("alternative", ['less', 'greater', 'two-sided'])
|
|
def test_against_R_multicomp_glht(self, case, alternative):
|
|
rng = np.random.default_rng(189117774084579816190295271136455278291)
|
|
samples = case['samples']
|
|
control = case['control']
|
|
alternatives = {'less': 'less', 'greater': 'greater',
|
|
'two-sided': 'twosided'}
|
|
p_ref = case['pvalues'][alternative.replace('-', '')]
|
|
|
|
res = stats.dunnett(*samples, control=control, alternative=alternative,
|
|
random_state=rng)
|
|
# atol can't be tighter because R reports some pvalues as "< 1e-4"
|
|
assert_allclose(res.pvalue, p_ref, rtol=5e-3, atol=1e-4)
|
|
|
|
ci_ref = case['cis'][alternatives[alternative]]
|
|
if alternative == "greater":
|
|
ci_ref = [ci_ref, np.inf]
|
|
elif alternative == "less":
|
|
ci_ref = [-np.inf, ci_ref]
|
|
assert res._ci is None
|
|
assert res._ci_cl is None
|
|
ci = res.confidence_interval(confidence_level=0.95)
|
|
assert_allclose(ci.low, ci_ref[0], rtol=5e-3, atol=1e-5)
|
|
assert_allclose(ci.high, ci_ref[1], rtol=5e-3, atol=1e-5)
|
|
|
|
# re-run to use the cached value "is" to check id as same object
|
|
assert res._ci is ci
|
|
assert res._ci_cl == 0.95
|
|
ci_ = res.confidence_interval(confidence_level=0.95)
|
|
assert ci_ is ci
|
|
|
|
@pytest.mark.parametrize('alternative', ["two-sided", "less", "greater"])
|
|
def test_str(self, alternative):
|
|
rng = np.random.default_rng(189117774084579816190295271136455278291)
|
|
|
|
res = stats.dunnett(
|
|
*self.samples_3, control=self.control_3, alternative=alternative,
|
|
random_state=rng
|
|
)
|
|
|
|
# check some str output
|
|
res_str = str(res)
|
|
assert '(Sample 2 - Control)' in res_str
|
|
assert '95.0%' in res_str
|
|
|
|
if alternative == 'less':
|
|
assert '-inf' in res_str
|
|
assert '19.' in res_str
|
|
elif alternative == 'greater':
|
|
assert 'inf' in res_str
|
|
assert '-13.' in res_str
|
|
else:
|
|
assert 'inf' not in res_str
|
|
assert '21.' in res_str
|
|
|
|
def test_warnings(self):
|
|
rng = np.random.default_rng(189117774084579816190295271136455278291)
|
|
|
|
res = stats.dunnett(
|
|
*self.samples_3, control=self.control_3, random_state=rng
|
|
)
|
|
msg = r"Computation of the confidence interval did not converge"
|
|
with pytest.warns(UserWarning, match=msg):
|
|
res._allowance(tol=1e-5)
|
|
|
|
def test_raises(self):
|
|
samples, control = self.samples_3, self.control_3
|
|
|
|
# alternative
|
|
with pytest.raises(ValueError, match="alternative must be"):
|
|
stats.dunnett(*samples, control=control, alternative='bob')
|
|
|
|
# 2D for a sample
|
|
samples_ = copy.deepcopy(samples)
|
|
samples_[0] = [samples_[0]]
|
|
with pytest.raises(ValueError, match="must be 1D arrays"):
|
|
stats.dunnett(*samples_, control=control)
|
|
|
|
# 2D for control
|
|
control_ = copy.deepcopy(control)
|
|
control_ = [control_]
|
|
with pytest.raises(ValueError, match="must be 1D arrays"):
|
|
stats.dunnett(*samples, control=control_)
|
|
|
|
# No obs in a sample
|
|
samples_ = copy.deepcopy(samples)
|
|
samples_[1] = []
|
|
with pytest.raises(ValueError, match="at least 1 observation"):
|
|
stats.dunnett(*samples_, control=control)
|
|
|
|
# No obs in control
|
|
control_ = []
|
|
with pytest.raises(ValueError, match="at least 1 observation"):
|
|
stats.dunnett(*samples, control=control_)
|
|
|
|
res = stats.dunnett(*samples, control=control)
|
|
with pytest.raises(ValueError, match="Confidence level must"):
|
|
res.confidence_interval(confidence_level=3)
|
|
|
|
@pytest.mark.filterwarnings("ignore:Computation of the confidence")
|
|
@pytest.mark.parametrize('n_samples', [1, 2, 3])
|
|
def test_shapes(self, n_samples):
|
|
rng = np.random.default_rng(689448934110805334)
|
|
samples = rng.normal(size=(n_samples, 10))
|
|
control = rng.normal(size=10)
|
|
res = stats.dunnett(*samples, control=control, random_state=rng)
|
|
assert res.statistic.shape == (n_samples,)
|
|
assert res.pvalue.shape == (n_samples,)
|
|
ci = res.confidence_interval()
|
|
assert ci.low.shape == (n_samples,)
|
|
assert ci.high.shape == (n_samples,)
|