import pytest import numpy as np from numpy.testing import assert_equal, assert_allclose from .._discrete_distns import nchypergeom_fisher, hypergeom from scipy.stats._odds_ratio import odds_ratio from .data.fisher_exact_results_from_r import data class TestOddsRatio: @pytest.mark.parametrize('parameters, rresult', data) def test_results_from_r(self, parameters, rresult): alternative = parameters.alternative.replace('.', '-') result = odds_ratio(parameters.table) # The results computed by R are not very accurate. if result.statistic < 400: or_rtol = 5e-4 ci_rtol = 2e-2 else: or_rtol = 5e-2 ci_rtol = 1e-1 assert_allclose(result.statistic, rresult.conditional_odds_ratio, rtol=or_rtol) ci = result.confidence_interval(parameters.confidence_level, alternative) assert_allclose((ci.low, ci.high), rresult.conditional_odds_ratio_ci, rtol=ci_rtol) # Also do a self-check for the conditional odds ratio. # With the computed conditional odds ratio as the noncentrality # parameter of the noncentral hypergeometric distribution with # parameters table.sum(), table[0].sum(), and table[:,0].sum() as # total, ngood and nsample, respectively, the mean of the distribution # should equal table[0, 0]. cor = result.statistic table = np.array(parameters.table) total = table.sum() ngood = table[0].sum() nsample = table[:, 0].sum() # nchypergeom_fisher does not allow the edge cases where the # noncentrality parameter is 0 or inf, so handle those values # separately here. if cor == 0: nchg_mean = hypergeom.support(total, ngood, nsample)[0] elif cor == np.inf: nchg_mean = hypergeom.support(total, ngood, nsample)[1] else: nchg_mean = nchypergeom_fisher.mean(total, ngood, nsample, cor) assert_allclose(nchg_mean, table[0, 0], rtol=1e-13) # Check that the confidence interval is correct. alpha = 1 - parameters.confidence_level if alternative == 'two-sided': if ci.low > 0: sf = nchypergeom_fisher.sf(table[0, 0] - 1, total, ngood, nsample, ci.low) assert_allclose(sf, alpha/2, rtol=1e-11) if np.isfinite(ci.high): cdf = nchypergeom_fisher.cdf(table[0, 0], total, ngood, nsample, ci.high) assert_allclose(cdf, alpha/2, rtol=1e-11) elif alternative == 'less': if np.isfinite(ci.high): cdf = nchypergeom_fisher.cdf(table[0, 0], total, ngood, nsample, ci.high) assert_allclose(cdf, alpha, rtol=1e-11) else: # alternative == 'greater' if ci.low > 0: sf = nchypergeom_fisher.sf(table[0, 0] - 1, total, ngood, nsample, ci.low) assert_allclose(sf, alpha, rtol=1e-11) @pytest.mark.parametrize('table', [ [[0, 0], [5, 10]], [[5, 10], [0, 0]], [[0, 5], [0, 10]], [[5, 0], [10, 0]], ]) def test_row_or_col_zero(self, table): result = odds_ratio(table) assert_equal(result.statistic, np.nan) ci = result.confidence_interval() assert_equal((ci.low, ci.high), (0, np.inf)) @pytest.mark.parametrize("case", [[0.95, 'two-sided', 0.4879913, 2.635883], [0.90, 'two-sided', 0.5588516, 2.301663]]) def test_sample_odds_ratio_ci(self, case): # Compare the sample odds ratio confidence interval to the R function # oddsratio.wald from the epitools package, e.g. # > library(epitools) # > table = matrix(c(10, 20, 41, 93), nrow=2, ncol=2, byrow=TRUE) # > result = oddsratio.wald(table) # > result$measure # odds ratio with 95% C.I. # Predictor estimate lower upper # Exposed1 1.000000 NA NA # Exposed2 1.134146 0.4879913 2.635883 confidence_level, alternative, ref_low, ref_high = case table = [[10, 20], [41, 93]] result = odds_ratio(table, kind='sample') assert_allclose(result.statistic, 1.134146, rtol=1e-6) ci = result.confidence_interval(confidence_level, alternative) assert_allclose([ci.low, ci.high], [ref_low, ref_high], rtol=1e-6) @pytest.mark.parametrize('alternative', ['less', 'greater', 'two-sided']) def test_sample_odds_ratio_one_sided_ci(self, alternative): # can't find a good reference for one-sided CI, so bump up the sample # size and compare against the conditional odds ratio CI table = [[1000, 2000], [4100, 9300]] res = odds_ratio(table, kind='sample') ref = odds_ratio(table, kind='conditional') assert_allclose(res.statistic, ref.statistic, atol=1e-5) assert_allclose(res.confidence_interval(alternative=alternative), ref.confidence_interval(alternative=alternative), atol=2e-3) @pytest.mark.parametrize('kind', ['sample', 'conditional']) @pytest.mark.parametrize('bad_table', [123, "foo", [10, 11, 12]]) def test_invalid_table_shape(self, kind, bad_table): with pytest.raises(ValueError, match="Invalid shape"): odds_ratio(bad_table, kind=kind) def test_invalid_table_type(self): with pytest.raises(ValueError, match='must be an array of integers'): odds_ratio([[1.0, 3.4], [5.0, 9.9]]) def test_negative_table_values(self): with pytest.raises(ValueError, match='must be nonnegative'): odds_ratio([[1, 2], [3, -4]]) def test_invalid_kind(self): with pytest.raises(ValueError, match='`kind` must be'): odds_ratio([[10, 20], [30, 14]], kind='magnetoreluctance') def test_invalid_alternative(self): result = odds_ratio([[5, 10], [2, 32]]) with pytest.raises(ValueError, match='`alternative` must be'): result.confidence_interval(alternative='depleneration') @pytest.mark.parametrize('level', [-0.5, 1.5]) def test_invalid_confidence_level(self, level): result = odds_ratio([[5, 10], [2, 32]]) with pytest.raises(ValueError, match='must be between 0 and 1'): result.confidence_interval(confidence_level=level)