import pytest import warnings import numpy as np from numpy.testing import (assert_array_equal, assert_allclose, suppress_warnings) from copy import deepcopy from scipy.stats.sampling import FastGeneratorInversion from scipy import stats def test_bad_args(): # loc and scale must be scalar with pytest.raises(ValueError, match="loc must be scalar"): FastGeneratorInversion(stats.norm(loc=(1.2, 1.3))) with pytest.raises(ValueError, match="scale must be scalar"): FastGeneratorInversion(stats.norm(scale=[1.5, 5.7])) with pytest.raises(ValueError, match="'test' cannot be used to seed"): FastGeneratorInversion(stats.norm(), random_state="test") msg = "Each of the 1 shape parameters must be a scalar" with pytest.raises(ValueError, match=msg): FastGeneratorInversion(stats.gamma([1.3, 2.5])) with pytest.raises(ValueError, match="`dist` must be a frozen"): FastGeneratorInversion("xy") with pytest.raises(ValueError, match="Distribution 'truncnorm' is not"): FastGeneratorInversion(stats.truncnorm(1.3, 4.5)) def test_random_state(): # fixed seed gen = FastGeneratorInversion(stats.norm(), random_state=68734509) x1 = gen.rvs(size=10) gen.random_state = 68734509 x2 = gen.rvs(size=10) assert_array_equal(x1, x2) # Generator urng = np.random.default_rng(20375857) gen = FastGeneratorInversion(stats.norm(), random_state=urng) x1 = gen.rvs(size=10) gen.random_state = np.random.default_rng(20375857) x2 = gen.rvs(size=10) assert_array_equal(x1, x2) # RandomState urng = np.random.RandomState(2364) gen = FastGeneratorInversion(stats.norm(), random_state=urng) x1 = gen.rvs(size=10) gen.random_state = np.random.RandomState(2364) x2 = gen.rvs(size=10) assert_array_equal(x1, x2) # if evaluate_error is called, it must not interfere with the random_state # used by rvs gen = FastGeneratorInversion(stats.norm(), random_state=68734509) x1 = gen.rvs(size=10) _ = gen.evaluate_error(size=5) # this will generate 5 uniform rvs x2 = gen.rvs(size=10) gen.random_state = 68734509 x3 = gen.rvs(size=20) assert_array_equal(x2, x3[10:]) dists_with_params = [ ("alpha", (3.5,)), ("anglit", ()), ("argus", (3.5,)), ("argus", (5.1,)), ("beta", (1.5, 0.9)), ("cosine", ()), ("betaprime", (2.5, 3.3)), ("bradford", (1.2,)), ("burr", (1.3, 2.4)), ("burr12", (0.7, 1.2)), ("cauchy", ()), ("chi2", (3.5,)), ("chi", (4.5,)), ("crystalball", (0.7, 1.2)), ("expon", ()), ("gamma", (1.5,)), ("gennorm", (2.7,)), ("gumbel_l", ()), ("gumbel_r", ()), ("hypsecant", ()), ("invgauss", (3.1,)), ("invweibull", (1.5,)), ("laplace", ()), ("logistic", ()), ("maxwell", ()), ("moyal", ()), ("norm", ()), ("pareto", (1.3,)), ("powerlaw", (7.6,)), ("rayleigh", ()), ("semicircular", ()), ("t", (5.7,)), ("wald", ()), ("weibull_max", (2.4,)), ("weibull_min", (1.2,)), ] @pytest.mark.parametrize(("distname, args"), dists_with_params) def test_rvs_and_ppf(distname, args): # check sample against rvs generated by rv_continuous urng = np.random.default_rng(9807324628097097) rng1 = getattr(stats, distname)(*args) rvs1 = rng1.rvs(size=500, random_state=urng) rng2 = FastGeneratorInversion(rng1, random_state=urng) rvs2 = rng2.rvs(size=500) assert stats.cramervonmises_2samp(rvs1, rvs2).pvalue > 0.01 # check ppf q = [0.001, 0.1, 0.5, 0.9, 0.999] assert_allclose(rng1.ppf(q), rng2.ppf(q), atol=1e-10) @pytest.mark.parametrize(("distname, args"), dists_with_params) def test_u_error(distname, args): # check sample against rvs generated by rv_continuous dist = getattr(stats, distname)(*args) with suppress_warnings() as sup: # filter the warnings thrown by UNU.RAN sup.filter(RuntimeWarning) rng = FastGeneratorInversion(dist) u_error, x_error = rng.evaluate_error( size=10_000, random_state=9807324628097097, x_error=False ) assert u_error <= 1e-10 @pytest.mark.xfail(reason="geninvgauss CDF is not accurate") def test_geninvgauss_uerror(): dist = stats.geninvgauss(3.2, 1.5) rng = FastGeneratorInversion(dist) err = rng.evaluate_error(size=10_000, random_state=67982) assert err[0] < 1e-10 # TODO: add more distributions @pytest.mark.parametrize(("distname, args"), [("beta", (0.11, 0.11))]) def test_error_extreme_params(distname, args): # take extreme parameters where u-error might not be below the tolerance # due to limitations of floating point arithmetic with suppress_warnings() as sup: # filter the warnings thrown by UNU.RAN for such extreme parameters sup.filter(RuntimeWarning) dist = getattr(stats, distname)(*args) rng = FastGeneratorInversion(dist) u_error, x_error = rng.evaluate_error( size=10_000, random_state=980732462809709732623, x_error=True ) if u_error >= 2.5 * 1e-10: assert x_error < 1e-9 def test_evaluate_error_inputs(): gen = FastGeneratorInversion(stats.norm()) with pytest.raises(ValueError, match="size must be an integer"): gen.evaluate_error(size=3.5) with pytest.raises(ValueError, match="size must be an integer"): gen.evaluate_error(size=(3, 3)) def test_rvs_ppf_loc_scale(): loc, scale = 3.5, 2.3 dist = stats.norm(loc=loc, scale=scale) rng = FastGeneratorInversion(dist, random_state=1234) r = rng.rvs(size=1000) r_rescaled = (r - loc) / scale assert stats.cramervonmises(r_rescaled, "norm").pvalue > 0.01 q = [0.001, 0.1, 0.5, 0.9, 0.999] assert_allclose(rng._ppf(q), rng.ppf(q), atol=1e-10) def test_domain(): # only a basic check that the domain argument is passed to the # UNU.RAN generators rng = FastGeneratorInversion(stats.norm(), domain=(-1, 1)) r = rng.rvs(size=100) assert -1 <= r.min() < r.max() <= 1 # if loc and scale are used, new domain is loc + scale*domain loc, scale = 3.5, 1.3 dist = stats.norm(loc=loc, scale=scale) rng = FastGeneratorInversion(dist, domain=(-1.5, 2)) r = rng.rvs(size=100) lb, ub = loc - scale * 1.5, loc + scale * 2 assert lb <= r.min() < r.max() <= ub @pytest.mark.parametrize(("distname, args, expected"), [("beta", (3.5, 2.5), (0, 1)), ("norm", (), (-np.inf, np.inf))]) def test_support(distname, args, expected): # test that the support is updated if truncation and loc/scale are applied # use beta distribution since it is a transformed betaprime distribution, # so it is important that the correct support is considered # (i.e., the support of beta is (0,1), while betaprime is (0, inf)) dist = getattr(stats, distname)(*args) rng = FastGeneratorInversion(dist) assert_array_equal(rng.support(), expected) rng.loc = 1 rng.scale = 2 assert_array_equal(rng.support(), 1 + 2*np.array(expected)) @pytest.mark.parametrize(("distname, args"), [("beta", (3.5, 2.5)), ("norm", ())]) def test_support_truncation(distname, args): # similar test for truncation dist = getattr(stats, distname)(*args) rng = FastGeneratorInversion(dist, domain=(0.5, 0.7)) assert_array_equal(rng.support(), (0.5, 0.7)) rng.loc = 1 rng.scale = 2 assert_array_equal(rng.support(), (1 + 2 * 0.5, 1 + 2 * 0.7)) def test_domain_shift_truncation(): # center of norm is zero, it should be shifted to the left endpoint of # domain. if this was not the case, PINV in UNURAN would raise a warning # as the center is not inside the domain with warnings.catch_warnings(): warnings.simplefilter("error") rng = FastGeneratorInversion(stats.norm(), domain=(1, 2)) r = rng.rvs(size=100) assert 1 <= r.min() < r.max() <= 2 def test_non_rvs_methods_with_domain(): # as a first step, compare truncated normal against stats.truncnorm rng = FastGeneratorInversion(stats.norm(), domain=(2.3, 3.2)) trunc_norm = stats.truncnorm(2.3, 3.2) # take values that are inside and outside the domain x = (2.0, 2.4, 3.0, 3.4) p = (0.01, 0.5, 0.99) assert_allclose(rng._cdf(x), trunc_norm.cdf(x)) assert_allclose(rng._ppf(p), trunc_norm.ppf(p)) loc, scale = 2, 3 rng.loc = 2 rng.scale = 3 trunc_norm = stats.truncnorm(2.3, 3.2, loc=loc, scale=scale) x = np.array(x) * scale + loc assert_allclose(rng._cdf(x), trunc_norm.cdf(x)) assert_allclose(rng._ppf(p), trunc_norm.ppf(p)) # do another sanity check with beta distribution # in that case, it is important to use the correct domain since beta # is a transformation of betaprime which has a different support rng = FastGeneratorInversion(stats.beta(2.5, 3.5), domain=(0.3, 0.7)) rng.loc = 2 rng.scale = 2.5 # the support is 2.75, , 3.75 (2 + 2.5 * 0.3, 2 + 2.5 * 0.7) assert_array_equal(rng.support(), (2.75, 3.75)) x = np.array([2.74, 2.76, 3.74, 3.76]) # the cdf needs to be zero outside of the domain y_cdf = rng._cdf(x) assert_array_equal((y_cdf[0], y_cdf[3]), (0, 1)) assert np.min(y_cdf[1:3]) > 0 # ppf needs to map 0 and 1 to the boundaries assert_allclose(rng._ppf(y_cdf), (2.75, 2.76, 3.74, 3.75)) def test_non_rvs_methods_without_domain(): norm_dist = stats.norm() rng = FastGeneratorInversion(norm_dist) x = np.linspace(-3, 3, num=10) p = (0.01, 0.5, 0.99) assert_allclose(rng._cdf(x), norm_dist.cdf(x)) assert_allclose(rng._ppf(p), norm_dist.ppf(p)) loc, scale = 0.5, 1.3 rng.loc = loc rng.scale = scale norm_dist = stats.norm(loc=loc, scale=scale) assert_allclose(rng._cdf(x), norm_dist.cdf(x)) assert_allclose(rng._ppf(p), norm_dist.ppf(p)) @pytest.mark.parametrize(("domain, x"), [(None, 0.5), ((0, 1), 0.5), ((0, 1), 1.5)]) def test_scalar_inputs(domain, x): """ pdf, cdf etc should map scalar values to scalars. check with and w/o domain since domain impacts pdf, cdf etc Take x inside and outside of domain """ rng = FastGeneratorInversion(stats.norm(), domain=domain) assert np.isscalar(rng._cdf(x)) assert np.isscalar(rng._ppf(0.5)) def test_domain_argus_large_chi(): # for large chi, the Gamma distribution is used and the domain has to be # transformed. this is a test to ensure that the transformation works chi, lb, ub = 5.5, 0.25, 0.75 rng = FastGeneratorInversion(stats.argus(chi), domain=(lb, ub)) rng.random_state = 4574 r = rng.rvs(size=500) assert lb <= r.min() < r.max() <= ub # perform goodness of fit test with conditional cdf cdf = stats.argus(chi).cdf prob = cdf(ub) - cdf(lb) assert stats.cramervonmises(r, lambda x: cdf(x) / prob).pvalue > 0.05 def test_setting_loc_scale(): rng = FastGeneratorInversion(stats.norm(), random_state=765765864) r1 = rng.rvs(size=1000) rng.loc = 3.0 rng.scale = 2.5 r2 = rng.rvs(1000) # rescaled r2 should be again standard normal assert stats.cramervonmises_2samp(r1, (r2 - 3) / 2.5).pvalue > 0.05 # reset values to default loc=0, scale=1 rng.loc = 0 rng.scale = 1 r2 = rng.rvs(1000) assert stats.cramervonmises_2samp(r1, r2).pvalue > 0.05 def test_ignore_shape_range(): msg = "No generator is defined for the shape parameters" with pytest.raises(ValueError, match=msg): rng = FastGeneratorInversion(stats.t(0.03)) rng = FastGeneratorInversion(stats.t(0.03), ignore_shape_range=True) # we can ignore the recommended range of shape parameters # but u-error can be expected to be too large in that case u_err, _ = rng.evaluate_error(size=1000, random_state=234) assert u_err >= 1e-6 @pytest.mark.xfail_on_32bit( "NumericalInversePolynomial.qrvs fails for Win 32-bit" ) class TestQRVS: def test_input_validation(self): gen = FastGeneratorInversion(stats.norm()) match = "`qmc_engine` must be an instance of..." with pytest.raises(ValueError, match=match): gen.qrvs(qmc_engine=0) match = "`d` must be consistent with dimension of `qmc_engine`." with pytest.raises(ValueError, match=match): gen.qrvs(d=3, qmc_engine=stats.qmc.Halton(2)) qrngs = [None, stats.qmc.Sobol(1, seed=0), stats.qmc.Halton(3, seed=0)] # `size=None` should not add anything to the shape, `size=1` should sizes = [ (None, tuple()), (1, (1,)), (4, (4,)), ((4,), (4,)), ((2, 4), (2, 4)), ] # Neither `d=None` nor `d=1` should add anything to the shape ds = [(None, tuple()), (1, tuple()), (3, (3,))] @pytest.mark.parametrize("qrng", qrngs) @pytest.mark.parametrize("size_in, size_out", sizes) @pytest.mark.parametrize("d_in, d_out", ds) def test_QRVS_shape_consistency(self, qrng, size_in, size_out, d_in, d_out): gen = FastGeneratorInversion(stats.norm()) # If d and qrng.d are inconsistent, an error is raised if d_in is not None and qrng is not None and qrng.d != d_in: match = "`d` must be consistent with dimension of `qmc_engine`." with pytest.raises(ValueError, match=match): gen.qrvs(size_in, d=d_in, qmc_engine=qrng) return # Sometimes d is really determined by qrng if d_in is None and qrng is not None and qrng.d != 1: d_out = (qrng.d,) shape_expected = size_out + d_out qrng2 = deepcopy(qrng) qrvs = gen.qrvs(size=size_in, d=d_in, qmc_engine=qrng) if size_in is not None: assert qrvs.shape == shape_expected if qrng2 is not None: uniform = qrng2.random(np.prod(size_in) or 1) qrvs2 = stats.norm.ppf(uniform).reshape(shape_expected) assert_allclose(qrvs, qrvs2, atol=1e-12) def test_QRVS_size_tuple(self): # QMCEngine samples are always of shape (n, d). When `size` is a tuple, # we set `n = prod(size)` in the call to qmc_engine.random, transform # the sample, and reshape it to the final dimensions. When we reshape, # we need to be careful, because the _columns_ of the sample returned # by a QMCEngine are "independent"-ish, but the elements within the # columns are not. We need to make sure that this doesn't get mixed up # by reshaping: qrvs[..., i] should remain "independent"-ish of # qrvs[..., i+1], but the elements within qrvs[..., i] should be # transformed from the same low-discrepancy sequence. gen = FastGeneratorInversion(stats.norm()) size = (3, 4) d = 5 qrng = stats.qmc.Halton(d, seed=0) qrng2 = stats.qmc.Halton(d, seed=0) uniform = qrng2.random(np.prod(size)) qrvs = gen.qrvs(size=size, d=d, qmc_engine=qrng) qrvs2 = stats.norm.ppf(uniform) for i in range(d): sample = qrvs[..., i] sample2 = qrvs2[:, i].reshape(size) assert_allclose(sample, sample2, atol=1e-12) def test_burr_overflow(): # this case leads to an overflow error if math.exp is used # in the definition of the burr pdf instead of np.exp # a direct implementation of the PDF as x**(-c-1) / (1+x**(-c))**(d+1) # also leads to an overflow error in the setup args = (1.89128135, 0.30195177) with suppress_warnings() as sup: # filter potential overflow warning sup.filter(RuntimeWarning) gen = FastGeneratorInversion(stats.burr(*args)) u_error, _ = gen.evaluate_error(random_state=4326) assert u_error <= 1e-10