import threading import pickle import pytest from copy import deepcopy import platform import sys import math import numpy as np from numpy.testing import assert_allclose, assert_equal, suppress_warnings from numpy.lib import NumpyVersion from scipy.stats.sampling import ( TransformedDensityRejection, DiscreteAliasUrn, DiscreteGuideTable, NumericalInversePolynomial, NumericalInverseHermite, SimpleRatioUniforms, UNURANError ) from scipy import stats from scipy import special from scipy.stats import chisquare, cramervonmises from scipy.stats._distr_params import distdiscrete, distcont from scipy._lib._util import check_random_state # common test data: this data can be shared between all the tests. # Normal distribution shared between all the continuous methods class StandardNormal: def pdf(self, x): # normalization constant needed for NumericalInverseHermite return 1./np.sqrt(2.*np.pi) * np.exp(-0.5 * x*x) def dpdf(self, x): return 1./np.sqrt(2.*np.pi) * -x * np.exp(-0.5 * x*x) def cdf(self, x): return special.ndtr(x) all_methods = [ ("TransformedDensityRejection", {"dist": StandardNormal()}), ("DiscreteAliasUrn", {"dist": [0.02, 0.18, 0.8]}), ("DiscreteGuideTable", {"dist": [0.02, 0.18, 0.8]}), ("NumericalInversePolynomial", {"dist": StandardNormal()}), ("NumericalInverseHermite", {"dist": StandardNormal()}), ("SimpleRatioUniforms", {"dist": StandardNormal(), "mode": 0}) ] if (sys.implementation.name == 'pypy' and sys.implementation.version < (7, 3, 10)): # changed in PyPy for v7.3.10 floaterr = r"unsupported operand type for float\(\): 'list'" else: floaterr = r"must be real number, not list" # Make sure an internal error occurs in UNU.RAN when invalid callbacks are # passed. Moreover, different generators throw different error messages. # So, in case of an `UNURANError`, we do not validate the error message. bad_pdfs_common = [ # Negative PDF (lambda x: -x, UNURANError, r"..."), # Returning wrong type (lambda x: [], TypeError, floaterr), # Undefined name inside the function (lambda x: foo, NameError, r"name 'foo' is not defined"), # type: ignore[name-defined] # noqa # Infinite value returned => Overflow error. (lambda x: np.inf, UNURANError, r"..."), # NaN value => internal error in UNU.RAN (lambda x: np.nan, UNURANError, r"..."), # signature of PDF wrong (lambda: 1.0, TypeError, r"takes 0 positional arguments but 1 was given") ] # same approach for dpdf bad_dpdf_common = [ # Infinite value returned. (lambda x: np.inf, UNURANError, r"..."), # NaN value => internal error in UNU.RAN (lambda x: np.nan, UNURANError, r"..."), # Returning wrong type (lambda x: [], TypeError, floaterr), # Undefined name inside the function (lambda x: foo, NameError, r"name 'foo' is not defined"), # type: ignore[name-defined] # noqa # signature of dPDF wrong (lambda: 1.0, TypeError, r"takes 0 positional arguments but 1 was given") ] # same approach for logpdf bad_logpdfs_common = [ # Returning wrong type (lambda x: [], TypeError, floaterr), # Undefined name inside the function (lambda x: foo, NameError, r"name 'foo' is not defined"), # type: ignore[name-defined] # noqa # Infinite value returned => Overflow error. (lambda x: np.inf, UNURANError, r"..."), # NaN value => internal error in UNU.RAN (lambda x: np.nan, UNURANError, r"..."), # signature of logpdf wrong (lambda: 1.0, TypeError, r"takes 0 positional arguments but 1 was given") ] bad_pv_common = [ ([], r"must contain at least one element"), ([[1.0, 0.0]], r"wrong number of dimensions \(expected 1, got 2\)"), ([0.2, 0.4, np.nan, 0.8], r"must contain only finite / non-nan values"), ([0.2, 0.4, np.inf, 0.8], r"must contain only finite / non-nan values"), ([0.0, 0.0], r"must contain at least one non-zero value"), ] # size of the domains is incorrect bad_sized_domains = [ # > 2 elements in the domain ((1, 2, 3), ValueError, r"must be a length 2 tuple"), # empty domain ((), ValueError, r"must be a length 2 tuple") ] # domain values are incorrect bad_domains = [ ((2, 1), UNURANError, r"left >= right"), ((1, 1), UNURANError, r"left >= right"), ] # infinite and nan values present in domain. inf_nan_domains = [ # left >= right ((10, 10), UNURANError, r"left >= right"), ((np.inf, np.inf), UNURANError, r"left >= right"), ((-np.inf, -np.inf), UNURANError, r"left >= right"), ((np.inf, -np.inf), UNURANError, r"left >= right"), # Also include nans in some of the domains. ((-np.inf, np.nan), ValueError, r"only non-nan values"), ((np.nan, np.inf), ValueError, r"only non-nan values") ] # `nan` values present in domain. Some distributions don't support # infinite tails, so don't mix the nan values with infinities. nan_domains = [ ((0, np.nan), ValueError, r"only non-nan values"), ((np.nan, np.nan), ValueError, r"only non-nan values") ] # all the methods should throw errors for nan, bad sized, and bad valued # domains. @pytest.mark.parametrize("domain, err, msg", bad_domains + bad_sized_domains + nan_domains) # type: ignore[operator] @pytest.mark.parametrize("method, kwargs", all_methods) def test_bad_domain(domain, err, msg, method, kwargs): Method = getattr(stats.sampling, method) with pytest.raises(err, match=msg): Method(**kwargs, domain=domain) @pytest.mark.parametrize("method, kwargs", all_methods) def test_random_state(method, kwargs): Method = getattr(stats.sampling, method) # simple seed that works for any version of NumPy seed = 123 rng1 = Method(**kwargs, random_state=seed) rng2 = Method(**kwargs, random_state=seed) assert_equal(rng1.rvs(100), rng2.rvs(100)) # global seed np.random.seed(123) rng1 = Method(**kwargs) rvs1 = rng1.rvs(100) np.random.seed(None) rng2 = Method(**kwargs, random_state=123) rvs2 = rng2.rvs(100) assert_equal(rvs1, rvs2) # Generator seed for new NumPy # when a RandomState is given, it should take the bitgen_t # member of the class and create a Generator instance. seed1 = np.random.RandomState(np.random.MT19937(123)) seed2 = np.random.Generator(np.random.MT19937(123)) rng1 = Method(**kwargs, random_state=seed1) rng2 = Method(**kwargs, random_state=seed2) assert_equal(rng1.rvs(100), rng2.rvs(100)) def test_set_random_state(): rng1 = TransformedDensityRejection(StandardNormal(), random_state=123) rng2 = TransformedDensityRejection(StandardNormal()) rng2.set_random_state(123) assert_equal(rng1.rvs(100), rng2.rvs(100)) rng = TransformedDensityRejection(StandardNormal(), random_state=123) rvs1 = rng.rvs(100) rng.set_random_state(123) rvs2 = rng.rvs(100) assert_equal(rvs1, rvs2) def test_threading_behaviour(): # Test if the API is thread-safe. # This verifies if the lock mechanism and the use of `PyErr_Occurred` # is correct. errors = {"err1": None, "err2": None} class Distribution: def __init__(self, pdf_msg): self.pdf_msg = pdf_msg def pdf(self, x): if 49.9 < x < 50.0: raise ValueError(self.pdf_msg) return x def dpdf(self, x): return 1 def func1(): dist = Distribution('foo') rng = TransformedDensityRejection(dist, domain=(10, 100), random_state=12) try: rng.rvs(100000) except ValueError as e: errors['err1'] = e.args[0] def func2(): dist = Distribution('bar') rng = TransformedDensityRejection(dist, domain=(10, 100), random_state=2) try: rng.rvs(100000) except ValueError as e: errors['err2'] = e.args[0] t1 = threading.Thread(target=func1) t2 = threading.Thread(target=func2) t1.start() t2.start() t1.join() t2.join() assert errors['err1'] == 'foo' assert errors['err2'] == 'bar' @pytest.mark.parametrize("method, kwargs", all_methods) def test_pickle(method, kwargs): Method = getattr(stats.sampling, method) rng1 = Method(**kwargs, random_state=123) obj = pickle.dumps(rng1) rng2 = pickle.loads(obj) assert_equal(rng1.rvs(100), rng2.rvs(100)) @pytest.mark.parametrize("size", [None, 0, (0, ), 1, (10, 3), (2, 3, 4, 5), (0, 0), (0, 1)]) def test_rvs_size(size): # As the `rvs` method is present in the base class and shared between # all the classes, we can just test with one of the methods. rng = TransformedDensityRejection(StandardNormal()) if size is None: assert np.isscalar(rng.rvs(size)) else: if np.isscalar(size): size = (size, ) assert rng.rvs(size).shape == size def test_with_scipy_distribution(): # test if the setup works with SciPy's rv_frozen distributions dist = stats.norm() urng = np.random.default_rng(0) rng = NumericalInverseHermite(dist, random_state=urng) u = np.linspace(0, 1, num=100) check_cont_samples(rng, dist, dist.stats()) assert_allclose(dist.ppf(u), rng.ppf(u)) # test if it works with `loc` and `scale` dist = stats.norm(loc=10., scale=5.) rng = NumericalInverseHermite(dist, random_state=urng) check_cont_samples(rng, dist, dist.stats()) assert_allclose(dist.ppf(u), rng.ppf(u)) # check for discrete distributions dist = stats.binom(10, 0.2) rng = DiscreteAliasUrn(dist, random_state=urng) domain = dist.support() pv = dist.pmf(np.arange(domain[0], domain[1]+1)) check_discr_samples(rng, pv, dist.stats()) def check_cont_samples(rng, dist, mv_ex): rvs = rng.rvs(100000) mv = rvs.mean(), rvs.var() # test the moments only if the variance is finite if np.isfinite(mv_ex[1]): assert_allclose(mv, mv_ex, rtol=1e-7, atol=1e-1) # Cramer Von Mises test for goodness-of-fit rvs = rng.rvs(500) dist.cdf = np.vectorize(dist.cdf) pval = cramervonmises(rvs, dist.cdf).pvalue assert pval > 0.1 def check_discr_samples(rng, pv, mv_ex): rvs = rng.rvs(100000) # test if the first few moments match mv = rvs.mean(), rvs.var() assert_allclose(mv, mv_ex, rtol=1e-3, atol=1e-1) # normalize pv = pv / pv.sum() # chi-squared test for goodness-of-fit obs_freqs = np.zeros_like(pv) _, freqs = np.unique(rvs, return_counts=True) freqs = freqs / freqs.sum() obs_freqs[:freqs.size] = freqs pval = chisquare(obs_freqs, pv).pvalue assert pval > 0.1 def test_warning_center_not_in_domain(): # UNURAN will warn if the center provided or the one computed w/o the # domain is outside of the domain msg = "102 : center moved into domain of distribution" with pytest.warns(RuntimeWarning, match=msg): NumericalInversePolynomial(StandardNormal(), center=0, domain=(3, 5)) with pytest.warns(RuntimeWarning, match=msg): NumericalInversePolynomial(StandardNormal(), domain=(3, 5)) @pytest.mark.parametrize('method', ["SimpleRatioUniforms", "NumericalInversePolynomial", "TransformedDensityRejection"]) def test_error_mode_not_in_domain(method): # UNURAN raises an error if the mode is not in the domain # the behavior is different compared to the case that center is not in the # domain. mode is supposed to be the exact value, center can be an # approximate value Method = getattr(stats.sampling, method) msg = "17 : mode not in domain" with pytest.raises(UNURANError, match=msg): Method(StandardNormal(), mode=0, domain=(3, 5)) @pytest.mark.parametrize('method', ["NumericalInverseHermite", "NumericalInversePolynomial"]) class TestQRVS: def test_input_validation(self, method): match = "`qmc_engine` must be an instance of..." with pytest.raises(ValueError, match=match): Method = getattr(stats.sampling, method) gen = Method(StandardNormal()) gen.qrvs(qmc_engine=0) # issues with QMCEngines and old NumPy Method = getattr(stats.sampling, method) gen = Method(StandardNormal()) match = "`d` must be consistent with dimension of `qmc_engine`." with pytest.raises(ValueError, match=match): gen.qrvs(d=3, qmc_engine=stats.qmc.Halton(2)) qrngs = [None, stats.qmc.Sobol(1, seed=0), stats.qmc.Halton(3, seed=0)] # `size=None` should not add anything to the shape, `size=1` should sizes = [(None, tuple()), (1, (1,)), (4, (4,)), ((4,), (4,)), ((2, 4), (2, 4))] # type: ignore # Neither `d=None` nor `d=1` should add anything to the shape ds = [(None, tuple()), (1, tuple()), (3, (3,))] @pytest.mark.parametrize('qrng', qrngs) @pytest.mark.parametrize('size_in, size_out', sizes) @pytest.mark.parametrize('d_in, d_out', ds) def test_QRVS_shape_consistency(self, qrng, size_in, size_out, d_in, d_out, method): w32 = sys.platform == "win32" and platform.architecture()[0] == "32bit" if w32 and method == "NumericalInversePolynomial": pytest.xfail("NumericalInversePolynomial.qrvs fails for Win " "32-bit") dist = StandardNormal() Method = getattr(stats.sampling, method) gen = Method(dist) # If d and qrng.d are inconsistent, an error is raised if d_in is not None and qrng is not None and qrng.d != d_in: match = "`d` must be consistent with dimension of `qmc_engine`." with pytest.raises(ValueError, match=match): gen.qrvs(size_in, d=d_in, qmc_engine=qrng) return # Sometimes d is really determined by qrng if d_in is None and qrng is not None and qrng.d != 1: d_out = (qrng.d,) shape_expected = size_out + d_out qrng2 = deepcopy(qrng) qrvs = gen.qrvs(size=size_in, d=d_in, qmc_engine=qrng) if size_in is not None: assert qrvs.shape == shape_expected if qrng2 is not None: uniform = qrng2.random(np.prod(size_in) or 1) qrvs2 = stats.norm.ppf(uniform).reshape(shape_expected) assert_allclose(qrvs, qrvs2, atol=1e-12) def test_QRVS_size_tuple(self, method): # QMCEngine samples are always of shape (n, d). When `size` is a tuple, # we set `n = prod(size)` in the call to qmc_engine.random, transform # the sample, and reshape it to the final dimensions. When we reshape, # we need to be careful, because the _columns_ of the sample returned # by a QMCEngine are "independent"-ish, but the elements within the # columns are not. We need to make sure that this doesn't get mixed up # by reshaping: qrvs[..., i] should remain "independent"-ish of # qrvs[..., i+1], but the elements within qrvs[..., i] should be # transformed from the same low-discrepancy sequence. dist = StandardNormal() Method = getattr(stats.sampling, method) gen = Method(dist) size = (3, 4) d = 5 qrng = stats.qmc.Halton(d, seed=0) qrng2 = stats.qmc.Halton(d, seed=0) uniform = qrng2.random(np.prod(size)) qrvs = gen.qrvs(size=size, d=d, qmc_engine=qrng) qrvs2 = stats.norm.ppf(uniform) for i in range(d): sample = qrvs[..., i] sample2 = qrvs2[:, i].reshape(size) assert_allclose(sample, sample2, atol=1e-12) class TestTransformedDensityRejection: # Simple Custom Distribution class dist0: def pdf(self, x): return 3/4 * (1-x*x) def dpdf(self, x): return 3/4 * (-2*x) def cdf(self, x): return 3/4 * (x - x**3/3 + 2/3) def support(self): return -1, 1 # Standard Normal Distribution class dist1: def pdf(self, x): return stats.norm._pdf(x / 0.1) def dpdf(self, x): return -x / 0.01 * stats.norm._pdf(x / 0.1) def cdf(self, x): return stats.norm._cdf(x / 0.1) # pdf with piecewise linear function as transformed density # with T = -1/sqrt with shift. Taken from UNU.RAN test suite # (from file t_tdr_ps.c) class dist2: def __init__(self, shift): self.shift = shift def pdf(self, x): x -= self.shift y = 1. / (abs(x) + 1.) return 0.5 * y * y def dpdf(self, x): x -= self.shift y = 1. / (abs(x) + 1.) y = y * y * y return y if (x < 0.) else -y def cdf(self, x): x -= self.shift if x <= 0.: return 0.5 / (1. - x) else: return 1. - 0.5 / (1. + x) dists = [dist0(), dist1(), dist2(0.), dist2(10000.)] # exact mean and variance of the distributions in the list dists mv0 = [0., 4./15.] mv1 = [0., 0.01] mv2 = [0., np.inf] mv3 = [10000., np.inf] mvs = [mv0, mv1, mv2, mv3] @pytest.mark.parametrize("dist, mv_ex", zip(dists, mvs)) def test_basic(self, dist, mv_ex): with suppress_warnings() as sup: # filter the warnings thrown by UNU.RAN sup.filter(RuntimeWarning) rng = TransformedDensityRejection(dist, random_state=42) check_cont_samples(rng, dist, mv_ex) # PDF 0 everywhere => bad construction points bad_pdfs = [(lambda x: 0, UNURANError, r"50 : bad construction points.")] bad_pdfs += bad_pdfs_common # type: ignore[arg-type] @pytest.mark.parametrize("pdf, err, msg", bad_pdfs) def test_bad_pdf(self, pdf, err, msg): class dist: pass dist.pdf = pdf dist.dpdf = lambda x: 1 # an arbitrary dPDF with pytest.raises(err, match=msg): TransformedDensityRejection(dist) @pytest.mark.parametrize("dpdf, err, msg", bad_dpdf_common) def test_bad_dpdf(self, dpdf, err, msg): class dist: pass dist.pdf = lambda x: x dist.dpdf = dpdf with pytest.raises(err, match=msg): TransformedDensityRejection(dist, domain=(1, 10)) # test domains with inf + nan in them. need to write a custom test for # this because not all methods support infinite tails. @pytest.mark.parametrize("domain, err, msg", inf_nan_domains) def test_inf_nan_domains(self, domain, err, msg): with pytest.raises(err, match=msg): TransformedDensityRejection(StandardNormal(), domain=domain) @pytest.mark.parametrize("construction_points", [-1, 0, 0.1]) def test_bad_construction_points_scalar(self, construction_points): with pytest.raises(ValueError, match=r"`construction_points` must be " r"a positive integer."): TransformedDensityRejection( StandardNormal(), construction_points=construction_points ) def test_bad_construction_points_array(self): # empty array construction_points = [] with pytest.raises(ValueError, match=r"`construction_points` must " r"either be a " r"scalar or a non-empty array."): TransformedDensityRejection( StandardNormal(), construction_points=construction_points ) # construction_points not monotonically increasing construction_points = [1, 1, 1, 1, 1, 1] with pytest.warns(RuntimeWarning, match=r"33 : starting points not " r"strictly monotonically " r"increasing"): TransformedDensityRejection( StandardNormal(), construction_points=construction_points ) # construction_points containing nans construction_points = [np.nan, np.nan, np.nan] with pytest.raises(UNURANError, match=r"50 : bad construction " r"points."): TransformedDensityRejection( StandardNormal(), construction_points=construction_points ) # construction_points out of domain construction_points = [-10, 10] with pytest.warns(RuntimeWarning, match=r"50 : starting point out of " r"domain"): TransformedDensityRejection( StandardNormal(), domain=(-3, 3), construction_points=construction_points ) @pytest.mark.parametrize("c", [-1., np.nan, np.inf, 0.1, 1.]) def test_bad_c(self, c): msg = r"`c` must either be -0.5 or 0." with pytest.raises(ValueError, match=msg): TransformedDensityRejection(StandardNormal(), c=-1.) u = [np.linspace(0, 1, num=1000), [], [[]], [np.nan], [-np.inf, np.nan, np.inf], 0, [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-2, 3, 4]]] @pytest.mark.parametrize("u", u) def test_ppf_hat(self, u): # Increase the `max_squeeze_hat_ratio` so the ppf_hat is more # accurate. rng = TransformedDensityRejection(StandardNormal(), max_squeeze_hat_ratio=0.9999) # Older versions of NumPy throw RuntimeWarnings for comparisons # with nan. with suppress_warnings() as sup: sup.filter(RuntimeWarning, "invalid value encountered in greater") sup.filter(RuntimeWarning, "invalid value encountered in " "greater_equal") sup.filter(RuntimeWarning, "invalid value encountered in less") sup.filter(RuntimeWarning, "invalid value encountered in " "less_equal") res = rng.ppf_hat(u) expected = stats.norm.ppf(u) assert_allclose(res, expected, rtol=1e-3, atol=1e-5) assert res.shape == expected.shape def test_bad_dist(self): # Empty distribution class dist: ... msg = r"`pdf` required but not found." with pytest.raises(ValueError, match=msg): TransformedDensityRejection(dist) # dPDF not present in dist class dist: pdf = lambda x: 1-x*x # noqa: E731 msg = r"`dpdf` required but not found." with pytest.raises(ValueError, match=msg): TransformedDensityRejection(dist) class TestDiscreteAliasUrn: # DAU fails on these probably because of large domains and small # computation errors in PMF. Mean/SD match but chi-squared test fails. basic_fail_dists = { 'nchypergeom_fisher', # numerical erros on tails 'nchypergeom_wallenius', # numerical erros on tails 'randint' # fails on 32-bit ubuntu } @pytest.mark.parametrize("distname, params", distdiscrete) def test_basic(self, distname, params): if distname in self.basic_fail_dists: msg = ("DAU fails on these probably because of large domains " "and small computation errors in PMF.") pytest.skip(msg) if not isinstance(distname, str): dist = distname else: dist = getattr(stats, distname) dist = dist(*params) domain = dist.support() if not np.isfinite(domain[1] - domain[0]): # DAU only works with finite domain. So, skip the distributions # with infinite tails. pytest.skip("DAU only works with a finite domain.") k = np.arange(domain[0], domain[1]+1) pv = dist.pmf(k) mv_ex = dist.stats('mv') rng = DiscreteAliasUrn(dist, random_state=42) check_discr_samples(rng, pv, mv_ex) # Can't use bad_pmf_common here as we evaluate PMF early on to avoid # unhelpful errors from UNU.RAN. bad_pmf = [ # inf returned (lambda x: np.inf, ValueError, r"must contain only finite / non-nan values"), # nan returned (lambda x: np.nan, ValueError, r"must contain only finite / non-nan values"), # all zeros (lambda x: 0.0, ValueError, r"must contain at least one non-zero value"), # Undefined name inside the function (lambda x: foo, NameError, # type: ignore[name-defined] # noqa r"name 'foo' is not defined"), # Returning wrong type. (lambda x: [], ValueError, r"setting an array element with a sequence."), # probabilities < 0 (lambda x: -x, UNURANError, r"50 : probability < 0"), # signature of PMF wrong (lambda: 1.0, TypeError, r"takes 0 positional arguments but 1 was given") ] @pytest.mark.parametrize("pmf, err, msg", bad_pmf) def test_bad_pmf(self, pmf, err, msg): class dist: pass dist.pmf = pmf with pytest.raises(err, match=msg): DiscreteAliasUrn(dist, domain=(1, 10)) @pytest.mark.parametrize("pv", [[0.18, 0.02, 0.8], [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]]) def test_sampling_with_pv(self, pv): pv = np.asarray(pv, dtype=np.float64) rng = DiscreteAliasUrn(pv, random_state=123) rvs = rng.rvs(100_000) pv = pv / pv.sum() variates = np.arange(0, len(pv)) # test if the first few moments match m_expected = np.average(variates, weights=pv) v_expected = np.average((variates - m_expected) ** 2, weights=pv) mv_expected = m_expected, v_expected check_discr_samples(rng, pv, mv_expected) @pytest.mark.parametrize("pv, msg", bad_pv_common) def test_bad_pv(self, pv, msg): with pytest.raises(ValueError, match=msg): DiscreteAliasUrn(pv) # DAU doesn't support infinite tails. So, it should throw an error when # inf is present in the domain. inf_domain = [(-np.inf, np.inf), (np.inf, np.inf), (-np.inf, -np.inf), (0, np.inf), (-np.inf, 0)] @pytest.mark.parametrize("domain", inf_domain) def test_inf_domain(self, domain): with pytest.raises(ValueError, match=r"must be finite"): DiscreteAliasUrn(stats.binom(10, 0.2), domain=domain) def test_bad_urn_factor(self): with pytest.warns(RuntimeWarning, match=r"relative urn size < 1."): DiscreteAliasUrn([0.5, 0.5], urn_factor=-1) def test_bad_args(self): msg = (r"`domain` must be provided when the " r"probability vector is not available.") class dist: def pmf(self, x): return x with pytest.raises(ValueError, match=msg): DiscreteAliasUrn(dist) class TestNumericalInversePolynomial: # Simple Custom Distribution class dist0: def pdf(self, x): return 3/4 * (1-x*x) def cdf(self, x): return 3/4 * (x - x**3/3 + 2/3) def support(self): return -1, 1 # Standard Normal Distribution class dist1: def pdf(self, x): return stats.norm._pdf(x / 0.1) def cdf(self, x): return stats.norm._cdf(x / 0.1) # Sin 2 distribution # / 0.05 + 0.45*(1 +sin(2 Pi x)) if |x| <= 1 # f(x) = < # \ 0 otherwise # Taken from UNU.RAN test suite (from file t_pinv.c) class dist2: def pdf(self, x): return 0.05 + 0.45 * (1 + np.sin(2*np.pi*x)) def cdf(self, x): return (0.05*(x + 1) + 0.9*(1. + 2.*np.pi*(1 + x) - np.cos(2.*np.pi*x)) / (4.*np.pi)) def support(self): return -1, 1 # Sin 10 distribution # / 0.05 + 0.45*(1 +sin(2 Pi x)) if |x| <= 5 # f(x) = < # \ 0 otherwise # Taken from UNU.RAN test suite (from file t_pinv.c) class dist3: def pdf(self, x): return 0.2 * (0.05 + 0.45 * (1 + np.sin(2*np.pi*x))) def cdf(self, x): return x/10. + 0.5 + 0.09/(2*np.pi) * (np.cos(10*np.pi) - np.cos(2*np.pi*x)) def support(self): return -5, 5 dists = [dist0(), dist1(), dist2(), dist3()] # exact mean and variance of the distributions in the list dists mv0 = [0., 4./15.] mv1 = [0., 0.01] mv2 = [-0.45/np.pi, 2/3*0.5 - 0.45**2/np.pi**2] mv3 = [-0.45/np.pi, 0.2 * 250/3 * 0.5 - 0.45**2/np.pi**2] mvs = [mv0, mv1, mv2, mv3] @pytest.mark.parametrize("dist, mv_ex", zip(dists, mvs)) def test_basic(self, dist, mv_ex): rng = NumericalInversePolynomial(dist, random_state=42) check_cont_samples(rng, dist, mv_ex) very_slow_dists = ['studentized_range', 'trapezoid', 'triang', 'vonmises', 'levy_stable', 'kappa4', 'ksone', 'kstwo', 'levy_l', 'gausshyper', 'anglit'] # for these distributions, some assertions fail due to minor # numerical differences. They can be avoided either by changing # the seed or by increasing the u_resolution. fail_dists = ['ncf', 'pareto', 'chi2', 'fatiguelife', 'halfgennorm', 'gibrat', 'lognorm', 'ncx2', 't'] @pytest.mark.xslow @pytest.mark.parametrize("distname, params", distcont) def test_basic_all_scipy_dists(self, distname, params): if distname in self.very_slow_dists: pytest.skip(f"PINV too slow for {distname}") if distname in self.fail_dists: pytest.skip(f"PINV fails for {distname}") dist = (getattr(stats, distname) if isinstance(distname, str) else distname) dist = dist(*params) with suppress_warnings() as sup: sup.filter(RuntimeWarning) rng = NumericalInversePolynomial(dist, random_state=42) check_cont_samples(rng, dist, [dist.mean(), dist.var()]) @pytest.mark.parametrize("pdf, err, msg", bad_pdfs_common) def test_bad_pdf(self, pdf, err, msg): class dist: pass dist.pdf = pdf with pytest.raises(err, match=msg): NumericalInversePolynomial(dist, domain=[0, 5]) @pytest.mark.parametrize("logpdf, err, msg", bad_logpdfs_common) def test_bad_logpdf(self, logpdf, err, msg): class dist: pass dist.logpdf = logpdf with pytest.raises(err, match=msg): NumericalInversePolynomial(dist, domain=[0, 5]) # test domains with inf + nan in them. need to write a custom test for # this because not all methods support infinite tails. @pytest.mark.parametrize("domain, err, msg", inf_nan_domains) def test_inf_nan_domains(self, domain, err, msg): with pytest.raises(err, match=msg): NumericalInversePolynomial(StandardNormal(), domain=domain) u = [ # test if quantile 0 and 1 return -inf and inf respectively and check # the correctness of the PPF for equidistant points between 0 and 1. np.linspace(0, 1, num=10000), # test the PPF method for empty arrays [], [[]], # test if nans and infs return nan result. [np.nan], [-np.inf, np.nan, np.inf], # test if a scalar is returned for a scalar input. 0, # test for arrays with nans, values greater than 1 and less than 0, # and some valid values. [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-2, 3, 4]] ] @pytest.mark.parametrize("u", u) def test_ppf(self, u): dist = StandardNormal() rng = NumericalInversePolynomial(dist, u_resolution=1e-14) # Older versions of NumPy throw RuntimeWarnings for comparisons # with nan. with suppress_warnings() as sup: sup.filter(RuntimeWarning, "invalid value encountered in greater") sup.filter(RuntimeWarning, "invalid value encountered in " "greater_equal") sup.filter(RuntimeWarning, "invalid value encountered in less") sup.filter(RuntimeWarning, "invalid value encountered in " "less_equal") res = rng.ppf(u) expected = stats.norm.ppf(u) assert_allclose(res, expected, rtol=1e-11, atol=1e-11) assert res.shape == expected.shape x = [np.linspace(-10, 10, num=10000), [], [[]], [np.nan], [-np.inf, np.nan, np.inf], 0, [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-np.inf, 3, 4]]] @pytest.mark.parametrize("x", x) def test_cdf(self, x): dist = StandardNormal() rng = NumericalInversePolynomial(dist, u_resolution=1e-14) # Older versions of NumPy throw RuntimeWarnings for comparisons # with nan. with suppress_warnings() as sup: sup.filter(RuntimeWarning, "invalid value encountered in greater") sup.filter(RuntimeWarning, "invalid value encountered in " "greater_equal") sup.filter(RuntimeWarning, "invalid value encountered in less") sup.filter(RuntimeWarning, "invalid value encountered in " "less_equal") res = rng.cdf(x) expected = stats.norm.cdf(x) assert_allclose(res, expected, rtol=1e-11, atol=1e-11) assert res.shape == expected.shape def test_u_error(self): dist = StandardNormal() rng = NumericalInversePolynomial(dist, u_resolution=1e-10) max_error, mae = rng.u_error() assert max_error < 1e-10 assert mae <= max_error rng = NumericalInversePolynomial(dist, u_resolution=1e-14) max_error, mae = rng.u_error() assert max_error < 1e-14 assert mae <= max_error bad_orders = [1, 4.5, 20, np.inf, np.nan] bad_u_resolution = [1e-20, 1e-1, np.inf, np.nan] @pytest.mark.parametrize("order", bad_orders) def test_bad_orders(self, order): dist = StandardNormal() msg = r"`order` must be an integer in the range \[3, 17\]." with pytest.raises(ValueError, match=msg): NumericalInversePolynomial(dist, order=order) @pytest.mark.parametrize("u_resolution", bad_u_resolution) def test_bad_u_resolution(self, u_resolution): msg = r"`u_resolution` must be between 1e-15 and 1e-5." with pytest.raises(ValueError, match=msg): NumericalInversePolynomial(StandardNormal(), u_resolution=u_resolution) def test_bad_args(self): class BadDist: def cdf(self, x): return stats.norm._cdf(x) dist = BadDist() msg = r"Either of the methods `pdf` or `logpdf` must be specified" with pytest.raises(ValueError, match=msg): rng = NumericalInversePolynomial(dist) dist = StandardNormal() rng = NumericalInversePolynomial(dist) msg = r"`sample_size` must be greater than or equal to 1000." with pytest.raises(ValueError, match=msg): rng.u_error(10) class Distribution: def pdf(self, x): return np.exp(-0.5 * x*x) dist = Distribution() rng = NumericalInversePolynomial(dist) msg = r"Exact CDF required but not found." with pytest.raises(ValueError, match=msg): rng.u_error() def test_logpdf_pdf_consistency(self): # 1. check that PINV works with pdf and logpdf only # 2. check that generated ppf is the same (up to a small tolerance) class MyDist: pass # create genrator from dist with only pdf dist_pdf = MyDist() dist_pdf.pdf = lambda x: math.exp(-x*x/2) rng1 = NumericalInversePolynomial(dist_pdf) # create dist with only logpdf dist_logpdf = MyDist() dist_logpdf.logpdf = lambda x: -x*x/2 rng2 = NumericalInversePolynomial(dist_logpdf) q = np.linspace(1e-5, 1-1e-5, num=100) assert_allclose(rng1.ppf(q), rng2.ppf(q)) class TestNumericalInverseHermite: # / (1 +sin(2 Pi x))/2 if |x| <= 1 # f(x) = < # \ 0 otherwise # Taken from UNU.RAN test suite (from file t_hinv.c) class dist0: def pdf(self, x): return 0.5*(1. + np.sin(2.*np.pi*x)) def dpdf(self, x): return np.pi*np.cos(2.*np.pi*x) def cdf(self, x): return (1. + 2.*np.pi*(1 + x) - np.cos(2.*np.pi*x)) / (4.*np.pi) def support(self): return -1, 1 # / Max(sin(2 Pi x)),0)Pi/2 if -1 < x <0.5 # f(x) = < # \ 0 otherwise # Taken from UNU.RAN test suite (from file t_hinv.c) class dist1: def pdf(self, x): if (x <= -0.5): return np.sin((2. * np.pi) * x) * 0.5 * np.pi if (x < 0.): return 0. if (x <= 0.5): return np.sin((2. * np.pi) * x) * 0.5 * np.pi def dpdf(self, x): if (x <= -0.5): return np.cos((2. * np.pi) * x) * np.pi * np.pi if (x < 0.): return 0. if (x <= 0.5): return np.cos((2. * np.pi) * x) * np.pi * np.pi def cdf(self, x): if (x <= -0.5): return 0.25 * (1 - np.cos((2. * np.pi) * x)) if (x < 0.): return 0.5 if (x <= 0.5): return 0.75 - 0.25 * np.cos((2. * np.pi) * x) def support(self): return -1, 0.5 dists = [dist0(), dist1()] # exact mean and variance of the distributions in the list dists mv0 = [-1/(2*np.pi), 1/3 - 1/(4*np.pi*np.pi)] mv1 = [-1/4, 3/8-1/(2*np.pi*np.pi) - 1/16] mvs = [mv0, mv1] @pytest.mark.parametrize("dist, mv_ex", zip(dists, mvs)) @pytest.mark.parametrize("order", [3, 5]) def test_basic(self, dist, mv_ex, order): rng = NumericalInverseHermite(dist, order=order, random_state=42) check_cont_samples(rng, dist, mv_ex) # test domains with inf + nan in them. need to write a custom test for # this because not all methods support infinite tails. @pytest.mark.parametrize("domain, err, msg", inf_nan_domains) def test_inf_nan_domains(self, domain, err, msg): with pytest.raises(err, match=msg): NumericalInverseHermite(StandardNormal(), domain=domain) def basic_test_all_scipy_dists(self, distname, shapes): slow_dists = {'ksone', 'kstwo', 'levy_stable', 'skewnorm'} fail_dists = {'beta', 'gausshyper', 'geninvgauss', 'ncf', 'nct', 'norminvgauss', 'genhyperbolic', 'studentized_range', 'vonmises', 'kappa4', 'invgauss', 'wald'} if distname in slow_dists: pytest.skip("Distribution is too slow") if distname in fail_dists: # specific reasons documented in gh-13319 # https://github.com/scipy/scipy/pull/13319#discussion_r626188955 pytest.xfail("Fails - usually due to inaccurate CDF/PDF") np.random.seed(0) dist = getattr(stats, distname)(*shapes) fni = NumericalInverseHermite(dist) x = np.random.rand(10) p_tol = np.max(np.abs(dist.ppf(x)-fni.ppf(x))/np.abs(dist.ppf(x))) u_tol = np.max(np.abs(dist.cdf(fni.ppf(x)) - x)) assert p_tol < 1e-8 assert u_tol < 1e-12 @pytest.mark.filterwarnings('ignore::RuntimeWarning') @pytest.mark.xslow @pytest.mark.parametrize(("distname", "shapes"), distcont) def test_basic_all_scipy_dists(self, distname, shapes): # if distname == "truncnorm": # pytest.skip("Tested separately") self.basic_test_all_scipy_dists(distname, shapes) @pytest.mark.filterwarnings('ignore::RuntimeWarning') def test_basic_truncnorm_gh17155(self): self.basic_test_all_scipy_dists("truncnorm", (0.1, 2)) def test_input_validation(self): match = r"`order` must be either 1, 3, or 5." with pytest.raises(ValueError, match=match): NumericalInverseHermite(StandardNormal(), order=2) match = "`cdf` required but not found" with pytest.raises(ValueError, match=match): NumericalInverseHermite("norm") match = "could not convert string to float" with pytest.raises(ValueError, match=match): NumericalInverseHermite(StandardNormal(), u_resolution='ekki') rngs = [None, 0, np.random.RandomState(0)] rngs.append(np.random.default_rng(0)) # type: ignore sizes = [(None, tuple()), (8, (8,)), ((4, 5, 6), (4, 5, 6))] @pytest.mark.parametrize('rng', rngs) @pytest.mark.parametrize('size_in, size_out', sizes) def test_RVS(self, rng, size_in, size_out): dist = StandardNormal() fni = NumericalInverseHermite(dist) rng2 = deepcopy(rng) rvs = fni.rvs(size=size_in, random_state=rng) if size_in is not None: assert rvs.shape == size_out if rng2 is not None: rng2 = check_random_state(rng2) uniform = rng2.uniform(size=size_in) rvs2 = stats.norm.ppf(uniform) assert_allclose(rvs, rvs2) def test_inaccurate_CDF(self): # CDF function with inaccurate tail cannot be inverted; see gh-13319 # https://github.com/scipy/scipy/pull/13319#discussion_r626188955 shapes = (2.3098496451481823, 0.6268795430096368) match = ("98 : one or more intervals very short; possibly due to " "numerical problems with a pole or very flat tail") # fails with default tol with pytest.warns(RuntimeWarning, match=match): NumericalInverseHermite(stats.beta(*shapes)) # no error with coarser tol NumericalInverseHermite(stats.beta(*shapes), u_resolution=1e-8) def test_custom_distribution(self): dist1 = StandardNormal() fni1 = NumericalInverseHermite(dist1) dist2 = stats.norm() fni2 = NumericalInverseHermite(dist2) assert_allclose(fni1.rvs(random_state=0), fni2.rvs(random_state=0)) u = [ # check the correctness of the PPF for equidistant points between # 0.02 and 0.98. np.linspace(0., 1., num=10000), # test the PPF method for empty arrays [], [[]], # test if nans and infs return nan result. [np.nan], [-np.inf, np.nan, np.inf], # test if a scalar is returned for a scalar input. 0, # test for arrays with nans, values greater than 1 and less than 0, # and some valid values. [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-2, 3, 4]] ] @pytest.mark.parametrize("u", u) def test_ppf(self, u): dist = StandardNormal() rng = NumericalInverseHermite(dist, u_resolution=1e-12) # Older versions of NumPy throw RuntimeWarnings for comparisons # with nan. with suppress_warnings() as sup: sup.filter(RuntimeWarning, "invalid value encountered in greater") sup.filter(RuntimeWarning, "invalid value encountered in " "greater_equal") sup.filter(RuntimeWarning, "invalid value encountered in less") sup.filter(RuntimeWarning, "invalid value encountered in " "less_equal") res = rng.ppf(u) expected = stats.norm.ppf(u) assert_allclose(res, expected, rtol=1e-9, atol=3e-10) assert res.shape == expected.shape def test_u_error(self): dist = StandardNormal() rng = NumericalInverseHermite(dist, u_resolution=1e-10) max_error, mae = rng.u_error() assert max_error < 1e-10 assert mae <= max_error with suppress_warnings() as sup: # ignore warning about u-resolution being too small. sup.filter(RuntimeWarning) rng = NumericalInverseHermite(dist, u_resolution=1e-14) max_error, mae = rng.u_error() assert max_error < 1e-14 assert mae <= max_error class TestDiscreteGuideTable: basic_fail_dists = { 'nchypergeom_fisher', # numerical errors on tails 'nchypergeom_wallenius', # numerical errors on tails 'randint' # fails on 32-bit ubuntu } def test_guide_factor_gt3_raises_warning(self): pv = [0.1, 0.3, 0.6] urng = np.random.default_rng() with pytest.warns(RuntimeWarning): DiscreteGuideTable(pv, random_state=urng, guide_factor=7) def test_guide_factor_zero_raises_warning(self): pv = [0.1, 0.3, 0.6] urng = np.random.default_rng() with pytest.warns(RuntimeWarning): DiscreteGuideTable(pv, random_state=urng, guide_factor=0) def test_negative_guide_factor_raises_warning(self): # This occurs from the UNU.RAN wrapper automatically. # however it already gives a useful warning # Here we just test that a warning is raised. pv = [0.1, 0.3, 0.6] urng = np.random.default_rng() with pytest.warns(RuntimeWarning): DiscreteGuideTable(pv, random_state=urng, guide_factor=-1) @pytest.mark.parametrize("distname, params", distdiscrete) def test_basic(self, distname, params): if distname in self.basic_fail_dists: msg = ("DGT fails on these probably because of large domains " "and small computation errors in PMF.") pytest.skip(msg) if not isinstance(distname, str): dist = distname else: dist = getattr(stats, distname) dist = dist(*params) domain = dist.support() if not np.isfinite(domain[1] - domain[0]): # DGT only works with finite domain. So, skip the distributions # with infinite tails. pytest.skip("DGT only works with a finite domain.") k = np.arange(domain[0], domain[1]+1) pv = dist.pmf(k) mv_ex = dist.stats('mv') rng = DiscreteGuideTable(dist, random_state=42) check_discr_samples(rng, pv, mv_ex) u = [ # the correctness of the PPF for equidistant points between 0 and 1. np.linspace(0, 1, num=10000), # test the PPF method for empty arrays [], [[]], # test if nans and infs return nan result. [np.nan], [-np.inf, np.nan, np.inf], # test if a scalar is returned for a scalar input. 0, # test for arrays with nans, values greater than 1 and less than 0, # and some valid values. [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-2, 3, 4]] ] @pytest.mark.parametrize('u', u) def test_ppf(self, u): n, p = 4, 0.1 dist = stats.binom(n, p) rng = DiscreteGuideTable(dist, random_state=42) # Older versions of NumPy throw RuntimeWarnings for comparisons # with nan. with suppress_warnings() as sup: sup.filter(RuntimeWarning, "invalid value encountered in greater") sup.filter(RuntimeWarning, "invalid value encountered in " "greater_equal") sup.filter(RuntimeWarning, "invalid value encountered in less") sup.filter(RuntimeWarning, "invalid value encountered in " "less_equal") res = rng.ppf(u) expected = stats.binom.ppf(u, n, p) assert_equal(res.shape, expected.shape) assert_equal(res, expected) @pytest.mark.parametrize("pv, msg", bad_pv_common) def test_bad_pv(self, pv, msg): with pytest.raises(ValueError, match=msg): DiscreteGuideTable(pv) # DGT doesn't support infinite tails. So, it should throw an error when # inf is present in the domain. inf_domain = [(-np.inf, np.inf), (np.inf, np.inf), (-np.inf, -np.inf), (0, np.inf), (-np.inf, 0)] @pytest.mark.parametrize("domain", inf_domain) def test_inf_domain(self, domain): with pytest.raises(ValueError, match=r"must be finite"): DiscreteGuideTable(stats.binom(10, 0.2), domain=domain) class TestSimpleRatioUniforms: # pdf with piecewise linear function as transformed density # with T = -1/sqrt with shift. Taken from UNU.RAN test suite # (from file t_srou.c) class dist: def __init__(self, shift): self.shift = shift self.mode = shift def pdf(self, x): x -= self.shift y = 1. / (abs(x) + 1.) return 0.5 * y * y def cdf(self, x): x -= self.shift if x <= 0.: return 0.5 / (1. - x) else: return 1. - 0.5 / (1. + x) dists = [dist(0.), dist(10000.)] # exact mean and variance of the distributions in the list dists mv1 = [0., np.inf] mv2 = [10000., np.inf] mvs = [mv1, mv2] @pytest.mark.parametrize("dist, mv_ex", zip(dists, mvs)) def test_basic(self, dist, mv_ex): rng = SimpleRatioUniforms(dist, mode=dist.mode, random_state=42) check_cont_samples(rng, dist, mv_ex) rng = SimpleRatioUniforms(dist, mode=dist.mode, cdf_at_mode=dist.cdf(dist.mode), random_state=42) check_cont_samples(rng, dist, mv_ex) # test domains with inf + nan in them. need to write a custom test for # this because not all methods support infinite tails. @pytest.mark.parametrize("domain, err, msg", inf_nan_domains) def test_inf_nan_domains(self, domain, err, msg): with pytest.raises(err, match=msg): SimpleRatioUniforms(StandardNormal(), domain=domain) def test_bad_args(self): # pdf_area < 0 with pytest.raises(ValueError, match=r"`pdf_area` must be > 0"): SimpleRatioUniforms(StandardNormal(), mode=0, pdf_area=-1)