""" Test functions for multivariate normal distributions. """ from __future__ import division, print_function, absolute_import import pickle from numpy.testing import (assert_allclose, assert_almost_equal, assert_array_almost_equal, assert_equal, assert_array_less, assert_) import pytest from pytest import raises as assert_raises from .test_continuous_basic import check_distribution_rvs import numpy import numpy as np import scipy.linalg from scipy.stats._multivariate import _PSD, _lnB, _cho_inv_batch from scipy.stats import multivariate_normal from scipy.stats import matrix_normal from scipy.stats import special_ortho_group, ortho_group from scipy.stats import random_correlation from scipy.stats import unitary_group from scipy.stats import dirichlet, beta from scipy.stats import wishart, multinomial, invwishart, chi2, invgamma from scipy.stats import norm, uniform from scipy.stats import ks_2samp, kstest from scipy.stats import binom from scipy.integrate import romb from scipy.special import multigammaln from .common_tests import check_random_state_property class TestMultivariateNormal(object): def test_input_shape(self): mu = np.arange(3) cov = np.identity(2) assert_raises(ValueError, multivariate_normal.pdf, (0, 1), mu, cov) assert_raises(ValueError, multivariate_normal.pdf, (0, 1, 2), mu, cov) assert_raises(ValueError, multivariate_normal.cdf, (0, 1), mu, cov) assert_raises(ValueError, multivariate_normal.cdf, (0, 1, 2), mu, cov) def test_scalar_values(self): np.random.seed(1234) # When evaluated on scalar data, the pdf should return a scalar x, mean, cov = 1.5, 1.7, 2.5 pdf = multivariate_normal.pdf(x, mean, cov) assert_equal(pdf.ndim, 0) # When evaluated on a single vector, the pdf should return a scalar x = np.random.randn(5) mean = np.random.randn(5) cov = np.abs(np.random.randn(5)) # Diagonal values for cov. matrix pdf = multivariate_normal.pdf(x, mean, cov) assert_equal(pdf.ndim, 0) # When evaluated on scalar data, the cdf should return a scalar x, mean, cov = 1.5, 1.7, 2.5 cdf = multivariate_normal.cdf(x, mean, cov) assert_equal(cdf.ndim, 0) # When evaluated on a single vector, the cdf should return a scalar x = np.random.randn(5) mean = np.random.randn(5) cov = np.abs(np.random.randn(5)) # Diagonal values for cov. matrix cdf = multivariate_normal.cdf(x, mean, cov) assert_equal(cdf.ndim, 0) def test_logpdf(self): # Check that the log of the pdf is in fact the logpdf np.random.seed(1234) x = np.random.randn(5) mean = np.random.randn(5) cov = np.abs(np.random.randn(5)) d1 = multivariate_normal.logpdf(x, mean, cov) d2 = multivariate_normal.pdf(x, mean, cov) assert_allclose(d1, np.log(d2)) def test_logpdf_default_values(self): # Check that the log of the pdf is in fact the logpdf # with default parameters Mean=None and cov = 1 np.random.seed(1234) x = np.random.randn(5) d1 = multivariate_normal.logpdf(x) d2 = multivariate_normal.pdf(x) # check whether default values are being used d3 = multivariate_normal.logpdf(x, None, 1) d4 = multivariate_normal.pdf(x, None, 1) assert_allclose(d1, np.log(d2)) assert_allclose(d3, np.log(d4)) def test_logcdf(self): # Check that the log of the cdf is in fact the logcdf np.random.seed(1234) x = np.random.randn(5) mean = np.random.randn(5) cov = np.abs(np.random.randn(5)) d1 = multivariate_normal.logcdf(x, mean, cov) d2 = multivariate_normal.cdf(x, mean, cov) assert_allclose(d1, np.log(d2)) def test_logcdf_default_values(self): # Check that the log of the cdf is in fact the logcdf # with default parameters Mean=None and cov = 1 np.random.seed(1234) x = np.random.randn(5) d1 = multivariate_normal.logcdf(x) d2 = multivariate_normal.cdf(x) # check whether default values are being used d3 = multivariate_normal.logcdf(x, None, 1) d4 = multivariate_normal.cdf(x, None, 1) assert_allclose(d1, np.log(d2)) assert_allclose(d3, np.log(d4)) def test_rank(self): # Check that the rank is detected correctly. np.random.seed(1234) n = 4 mean = np.random.randn(n) for expected_rank in range(1, n + 1): s = np.random.randn(n, expected_rank) cov = np.dot(s, s.T) distn = multivariate_normal(mean, cov, allow_singular=True) assert_equal(distn.cov_info.rank, expected_rank) def test_degenerate_distributions(self): def _sample_orthonormal_matrix(n): M = np.random.randn(n, n) u, s, v = scipy.linalg.svd(M) return u for n in range(1, 5): x = np.random.randn(n) for k in range(1, n + 1): # Sample a small covariance matrix. s = np.random.randn(k, k) cov_kk = np.dot(s, s.T) # Embed the small covariance matrix into a larger low rank matrix. cov_nn = np.zeros((n, n)) cov_nn[:k, :k] = cov_kk # Define a rotation of the larger low rank matrix. u = _sample_orthonormal_matrix(n) cov_rr = np.dot(u, np.dot(cov_nn, u.T)) y = np.dot(u, x) # Check some identities. distn_kk = multivariate_normal(np.zeros(k), cov_kk, allow_singular=True) distn_nn = multivariate_normal(np.zeros(n), cov_nn, allow_singular=True) distn_rr = multivariate_normal(np.zeros(n), cov_rr, allow_singular=True) assert_equal(distn_kk.cov_info.rank, k) assert_equal(distn_nn.cov_info.rank, k) assert_equal(distn_rr.cov_info.rank, k) pdf_kk = distn_kk.pdf(x[:k]) pdf_nn = distn_nn.pdf(x) pdf_rr = distn_rr.pdf(y) assert_allclose(pdf_kk, pdf_nn) assert_allclose(pdf_kk, pdf_rr) logpdf_kk = distn_kk.logpdf(x[:k]) logpdf_nn = distn_nn.logpdf(x) logpdf_rr = distn_rr.logpdf(y) assert_allclose(logpdf_kk, logpdf_nn) assert_allclose(logpdf_kk, logpdf_rr) def test_large_pseudo_determinant(self): # Check that large pseudo-determinants are handled appropriately. # Construct a singular diagonal covariance matrix # whose pseudo determinant overflows double precision. large_total_log = 1000.0 npos = 100 nzero = 2 large_entry = np.exp(large_total_log / npos) n = npos + nzero cov = np.zeros((n, n), dtype=float) np.fill_diagonal(cov, large_entry) cov[-nzero:, -nzero:] = 0 # Check some determinants. assert_equal(scipy.linalg.det(cov), 0) assert_equal(scipy.linalg.det(cov[:npos, :npos]), np.inf) assert_allclose(np.linalg.slogdet(cov[:npos, :npos]), (1, large_total_log)) # Check the pseudo-determinant. psd = _PSD(cov) assert_allclose(psd.log_pdet, large_total_log) def test_broadcasting(self): np.random.seed(1234) n = 4 # Construct a random covariance matrix. data = np.random.randn(n, n) cov = np.dot(data, data.T) mean = np.random.randn(n) # Construct an ndarray which can be interpreted as # a 2x3 array whose elements are random data vectors. X = np.random.randn(2, 3, n) # Check that multiple data points can be evaluated at once. desired_pdf = multivariate_normal.pdf(X, mean, cov) desired_cdf = multivariate_normal.cdf(X, mean, cov) for i in range(2): for j in range(3): actual = multivariate_normal.pdf(X[i, j], mean, cov) assert_allclose(actual, desired_pdf[i,j]) # Repeat for cdf actual = multivariate_normal.cdf(X[i, j], mean, cov) assert_allclose(actual, desired_cdf[i,j], rtol=1e-3) def test_normal_1D(self): # The probability density function for a 1D normal variable should # agree with the standard normal distribution in scipy.stats.distributions x = np.linspace(0, 2, 10) mean, cov = 1.2, 0.9 scale = cov**0.5 d1 = norm.pdf(x, mean, scale) d2 = multivariate_normal.pdf(x, mean, cov) assert_allclose(d1, d2) # The same should hold for the cumulative distribution function d1 = norm.cdf(x, mean, scale) d2 = multivariate_normal.cdf(x, mean, cov) assert_allclose(d1, d2) def test_marginalization(self): # Integrating out one of the variables of a 2D Gaussian should # yield a 1D Gaussian mean = np.array([2.5, 3.5]) cov = np.array([[.5, 0.2], [0.2, .6]]) n = 2 ** 8 + 1 # Number of samples delta = 6 / (n - 1) # Grid spacing v = np.linspace(0, 6, n) xv, yv = np.meshgrid(v, v) pos = np.empty((n, n, 2)) pos[:, :, 0] = xv pos[:, :, 1] = yv pdf = multivariate_normal.pdf(pos, mean, cov) # Marginalize over x and y axis margin_x = romb(pdf, delta, axis=0) margin_y = romb(pdf, delta, axis=1) # Compare with standard normal distribution gauss_x = norm.pdf(v, loc=mean[0], scale=cov[0, 0] ** 0.5) gauss_y = norm.pdf(v, loc=mean[1], scale=cov[1, 1] ** 0.5) assert_allclose(margin_x, gauss_x, rtol=1e-2, atol=1e-2) assert_allclose(margin_y, gauss_y, rtol=1e-2, atol=1e-2) def test_frozen(self): # The frozen distribution should agree with the regular one np.random.seed(1234) x = np.random.randn(5) mean = np.random.randn(5) cov = np.abs(np.random.randn(5)) norm_frozen = multivariate_normal(mean, cov) assert_allclose(norm_frozen.pdf(x), multivariate_normal.pdf(x, mean, cov)) assert_allclose(norm_frozen.logpdf(x), multivariate_normal.logpdf(x, mean, cov)) assert_allclose(norm_frozen.cdf(x), multivariate_normal.cdf(x, mean, cov)) assert_allclose(norm_frozen.logcdf(x), multivariate_normal.logcdf(x, mean, cov)) def test_pseudodet_pinv(self): # Make sure that pseudo-inverse and pseudo-det agree on cutoff # Assemble random covariance matrix with large and small eigenvalues np.random.seed(1234) n = 7 x = np.random.randn(n, n) cov = np.dot(x, x.T) s, u = scipy.linalg.eigh(cov) s = np.full(n, 0.5) s[0] = 1.0 s[-1] = 1e-7 cov = np.dot(u, np.dot(np.diag(s), u.T)) # Set cond so that the lowest eigenvalue is below the cutoff cond = 1e-5 psd = _PSD(cov, cond=cond) psd_pinv = _PSD(psd.pinv, cond=cond) # Check that the log pseudo-determinant agrees with the sum # of the logs of all but the smallest eigenvalue assert_allclose(psd.log_pdet, np.sum(np.log(s[:-1]))) # Check that the pseudo-determinant of the pseudo-inverse # agrees with 1 / pseudo-determinant assert_allclose(-psd.log_pdet, psd_pinv.log_pdet) def test_exception_nonsquare_cov(self): cov = [[1, 2, 3], [4, 5, 6]] assert_raises(ValueError, _PSD, cov) def test_exception_nonfinite_cov(self): cov_nan = [[1, 0], [0, np.nan]] assert_raises(ValueError, _PSD, cov_nan) cov_inf = [[1, 0], [0, np.inf]] assert_raises(ValueError, _PSD, cov_inf) def test_exception_non_psd_cov(self): cov = [[1, 0], [0, -1]] assert_raises(ValueError, _PSD, cov) def test_exception_singular_cov(self): np.random.seed(1234) x = np.random.randn(5) mean = np.random.randn(5) cov = np.ones((5, 5)) e = np.linalg.LinAlgError assert_raises(e, multivariate_normal, mean, cov) assert_raises(e, multivariate_normal.pdf, x, mean, cov) assert_raises(e, multivariate_normal.logpdf, x, mean, cov) assert_raises(e, multivariate_normal.cdf, x, mean, cov) assert_raises(e, multivariate_normal.logcdf, x, mean, cov) def test_R_values(self): # Compare the multivariate pdf with some values precomputed # in R version 3.0.1 (2013-05-16) on Mac OS X 10.6. # The values below were generated by the following R-script: # > library(mnormt) # > x <- seq(0, 2, length=5) # > y <- 3*x - 2 # > z <- x + cos(y) # > mu <- c(1, 3, 2) # > Sigma <- matrix(c(1,2,0,2,5,0.5,0,0.5,3), 3, 3) # > r_pdf <- dmnorm(cbind(x,y,z), mu, Sigma) r_pdf = np.array([0.0002214706, 0.0013819953, 0.0049138692, 0.0103803050, 0.0140250800]) x = np.linspace(0, 2, 5) y = 3 * x - 2 z = x + np.cos(y) r = np.array([x, y, z]).T mean = np.array([1, 3, 2], 'd') cov = np.array([[1, 2, 0], [2, 5, .5], [0, .5, 3]], 'd') pdf = multivariate_normal.pdf(r, mean, cov) assert_allclose(pdf, r_pdf, atol=1e-10) # Compare the multivariate cdf with some values precomputed # in R version 3.3.2 (2016-10-31) on Debian GNU/Linux. # The values below were generated by the following R-script: # > library(mnormt) # > x <- seq(0, 2, length=5) # > y <- 3*x - 2 # > z <- x + cos(y) # > mu <- c(1, 3, 2) # > Sigma <- matrix(c(1,2,0,2,5,0.5,0,0.5,3), 3, 3) # > r_cdf <- pmnorm(cbind(x,y,z), mu, Sigma) r_cdf = np.array([0.0017866215, 0.0267142892, 0.0857098761, 0.1063242573, 0.2501068509]) cdf = multivariate_normal.cdf(r, mean, cov) assert_allclose(cdf, r_cdf, atol=1e-5) # Also test bivariate cdf with some values precomputed # in R version 3.3.2 (2016-10-31) on Debian GNU/Linux. # The values below were generated by the following R-script: # > library(mnormt) # > x <- seq(0, 2, length=5) # > y <- 3*x - 2 # > mu <- c(1, 3) # > Sigma <- matrix(c(1,2,2,5), 2, 2) # > r_cdf2 <- pmnorm(cbind(x,y), mu, Sigma) r_cdf2 = np.array([0.01262147, 0.05838989, 0.18389571, 0.40696599, 0.66470577]) r2 = np.array([x, y]).T mean2 = np.array([1, 3], 'd') cov2 = np.array([[1, 2], [2, 5]], 'd') cdf2 = multivariate_normal.cdf(r2, mean2, cov2) assert_allclose(cdf2, r_cdf2, atol=1e-5) def test_multivariate_normal_rvs_zero_covariance(self): mean = np.zeros(2) covariance = np.zeros((2, 2)) model = multivariate_normal(mean, covariance, allow_singular=True) sample = model.rvs() assert_equal(sample, [0, 0]) def test_rvs_shape(self): # Check that rvs parses the mean and covariance correctly, and returns # an array of the right shape N = 300 d = 4 sample = multivariate_normal.rvs(mean=np.zeros(d), cov=1, size=N) assert_equal(sample.shape, (N, d)) sample = multivariate_normal.rvs(mean=None, cov=np.array([[2, .1], [.1, 1]]), size=N) assert_equal(sample.shape, (N, 2)) u = multivariate_normal(mean=0, cov=1) sample = u.rvs(N) assert_equal(sample.shape, (N, )) def test_large_sample(self): # Generate large sample and compare sample mean and sample covariance # with mean and covariance matrix. np.random.seed(2846) n = 3 mean = np.random.randn(n) M = np.random.randn(n, n) cov = np.dot(M, M.T) size = 5000 sample = multivariate_normal.rvs(mean, cov, size) assert_allclose(numpy.cov(sample.T), cov, rtol=1e-1) assert_allclose(sample.mean(0), mean, rtol=1e-1) def test_entropy(self): np.random.seed(2846) n = 3 mean = np.random.randn(n) M = np.random.randn(n, n) cov = np.dot(M, M.T) rv = multivariate_normal(mean, cov) # Check that frozen distribution agrees with entropy function assert_almost_equal(rv.entropy(), multivariate_normal.entropy(mean, cov)) # Compare entropy with manually computed expression involving # the sum of the logs of the eigenvalues of the covariance matrix eigs = np.linalg.eig(cov)[0] desired = 1 / 2 * (n * (np.log(2 * np.pi) + 1) + np.sum(np.log(eigs))) assert_almost_equal(desired, rv.entropy()) def test_lnB(self): alpha = np.array([1, 1, 1]) desired = .5 # e^lnB = 1/2 for [1, 1, 1] assert_almost_equal(np.exp(_lnB(alpha)), desired) class TestMatrixNormal(object): def test_bad_input(self): # Check that bad inputs raise errors num_rows = 4 num_cols = 3 M = np.full((num_rows,num_cols), 0.3) U = 0.5 * np.identity(num_rows) + np.full((num_rows, num_rows), 0.5) V = 0.7 * np.identity(num_cols) + np.full((num_cols, num_cols), 0.3) # Incorrect dimensions assert_raises(ValueError, matrix_normal, np.zeros((5,4,3))) assert_raises(ValueError, matrix_normal, M, np.zeros(10), V) assert_raises(ValueError, matrix_normal, M, U, np.zeros(10)) assert_raises(ValueError, matrix_normal, M, U, U) assert_raises(ValueError, matrix_normal, M, V, V) assert_raises(ValueError, matrix_normal, M.T, U, V) # Singular covariance e = np.linalg.LinAlgError assert_raises(e, matrix_normal, M, U, np.ones((num_cols, num_cols))) assert_raises(e, matrix_normal, M, np.ones((num_rows, num_rows)), V) def test_default_inputs(self): # Check that default argument handling works num_rows = 4 num_cols = 3 M = np.full((num_rows,num_cols), 0.3) U = 0.5 * np.identity(num_rows) + np.full((num_rows, num_rows), 0.5) V = 0.7 * np.identity(num_cols) + np.full((num_cols, num_cols), 0.3) Z = np.zeros((num_rows, num_cols)) Zr = np.zeros((num_rows, 1)) Zc = np.zeros((1, num_cols)) Ir = np.identity(num_rows) Ic = np.identity(num_cols) I1 = np.identity(1) assert_equal(matrix_normal.rvs(mean=M, rowcov=U, colcov=V).shape, (num_rows, num_cols)) assert_equal(matrix_normal.rvs(mean=M).shape, (num_rows, num_cols)) assert_equal(matrix_normal.rvs(rowcov=U).shape, (num_rows, 1)) assert_equal(matrix_normal.rvs(colcov=V).shape, (1, num_cols)) assert_equal(matrix_normal.rvs(mean=M, colcov=V).shape, (num_rows, num_cols)) assert_equal(matrix_normal.rvs(mean=M, rowcov=U).shape, (num_rows, num_cols)) assert_equal(matrix_normal.rvs(rowcov=U, colcov=V).shape, (num_rows, num_cols)) assert_equal(matrix_normal(mean=M).rowcov, Ir) assert_equal(matrix_normal(mean=M).colcov, Ic) assert_equal(matrix_normal(rowcov=U).mean, Zr) assert_equal(matrix_normal(rowcov=U).colcov, I1) assert_equal(matrix_normal(colcov=V).mean, Zc) assert_equal(matrix_normal(colcov=V).rowcov, I1) assert_equal(matrix_normal(mean=M, rowcov=U).colcov, Ic) assert_equal(matrix_normal(mean=M, colcov=V).rowcov, Ir) assert_equal(matrix_normal(rowcov=U, colcov=V).mean, Z) def test_covariance_expansion(self): # Check that covariance can be specified with scalar or vector num_rows = 4 num_cols = 3 M = np.full((num_rows, num_cols), 0.3) Uv = np.full(num_rows, 0.2) Us = 0.2 Vv = np.full(num_cols, 0.1) Vs = 0.1 Ir = np.identity(num_rows) Ic = np.identity(num_cols) assert_equal(matrix_normal(mean=M, rowcov=Uv, colcov=Vv).rowcov, 0.2*Ir) assert_equal(matrix_normal(mean=M, rowcov=Uv, colcov=Vv).colcov, 0.1*Ic) assert_equal(matrix_normal(mean=M, rowcov=Us, colcov=Vs).rowcov, 0.2*Ir) assert_equal(matrix_normal(mean=M, rowcov=Us, colcov=Vs).colcov, 0.1*Ic) def test_frozen_matrix_normal(self): for i in range(1,5): for j in range(1,5): M = np.full((i,j), 0.3) U = 0.5 * np.identity(i) + np.full((i,i), 0.5) V = 0.7 * np.identity(j) + np.full((j,j), 0.3) frozen = matrix_normal(mean=M, rowcov=U, colcov=V) rvs1 = frozen.rvs(random_state=1234) rvs2 = matrix_normal.rvs(mean=M, rowcov=U, colcov=V, random_state=1234) assert_equal(rvs1, rvs2) X = frozen.rvs(random_state=1234) pdf1 = frozen.pdf(X) pdf2 = matrix_normal.pdf(X, mean=M, rowcov=U, colcov=V) assert_equal(pdf1, pdf2) logpdf1 = frozen.logpdf(X) logpdf2 = matrix_normal.logpdf(X, mean=M, rowcov=U, colcov=V) assert_equal(logpdf1, logpdf2) def test_matches_multivariate(self): # Check that the pdfs match those obtained by vectorising and # treating as a multivariate normal. for i in range(1,5): for j in range(1,5): M = np.full((i,j), 0.3) U = 0.5 * np.identity(i) + np.full((i,i), 0.5) V = 0.7 * np.identity(j) + np.full((j,j), 0.3) frozen = matrix_normal(mean=M, rowcov=U, colcov=V) X = frozen.rvs(random_state=1234) pdf1 = frozen.pdf(X) logpdf1 = frozen.logpdf(X) vecX = X.T.flatten() vecM = M.T.flatten() cov = np.kron(V,U) pdf2 = multivariate_normal.pdf(vecX, mean=vecM, cov=cov) logpdf2 = multivariate_normal.logpdf(vecX, mean=vecM, cov=cov) assert_allclose(pdf1, pdf2, rtol=1E-10) assert_allclose(logpdf1, logpdf2, rtol=1E-10) def test_array_input(self): # Check array of inputs has the same output as the separate entries. num_rows = 4 num_cols = 3 M = np.full((num_rows,num_cols), 0.3) U = 0.5 * np.identity(num_rows) + np.full((num_rows, num_rows), 0.5) V = 0.7 * np.identity(num_cols) + np.full((num_cols, num_cols), 0.3) N = 10 frozen = matrix_normal(mean=M, rowcov=U, colcov=V) X1 = frozen.rvs(size=N, random_state=1234) X2 = frozen.rvs(size=N, random_state=4321) X = np.concatenate((X1[np.newaxis,:,:,:],X2[np.newaxis,:,:,:]), axis=0) assert_equal(X.shape, (2, N, num_rows, num_cols)) array_logpdf = frozen.logpdf(X) assert_equal(array_logpdf.shape, (2, N)) for i in range(2): for j in range(N): separate_logpdf = matrix_normal.logpdf(X[i,j], mean=M, rowcov=U, colcov=V) assert_allclose(separate_logpdf, array_logpdf[i,j], 1E-10) def test_moments(self): # Check that the sample moments match the parameters num_rows = 4 num_cols = 3 M = np.full((num_rows,num_cols), 0.3) U = 0.5 * np.identity(num_rows) + np.full((num_rows, num_rows), 0.5) V = 0.7 * np.identity(num_cols) + np.full((num_cols, num_cols), 0.3) N = 1000 frozen = matrix_normal(mean=M, rowcov=U, colcov=V) X = frozen.rvs(size=N, random_state=1234) sample_mean = np.mean(X,axis=0) assert_allclose(sample_mean, M, atol=0.1) sample_colcov = np.cov(X.reshape(N*num_rows,num_cols).T) assert_allclose(sample_colcov, V, atol=0.1) sample_rowcov = np.cov(np.swapaxes(X,1,2).reshape( N*num_cols,num_rows).T) assert_allclose(sample_rowcov, U, atol=0.1) class TestDirichlet(object): def test_frozen_dirichlet(self): np.random.seed(2846) n = np.random.randint(1, 32) alpha = np.random.uniform(10e-10, 100, n) d = dirichlet(alpha) assert_equal(d.var(), dirichlet.var(alpha)) assert_equal(d.mean(), dirichlet.mean(alpha)) assert_equal(d.entropy(), dirichlet.entropy(alpha)) num_tests = 10 for i in range(num_tests): x = np.random.uniform(10e-10, 100, n) x /= np.sum(x) assert_equal(d.pdf(x[:-1]), dirichlet.pdf(x[:-1], alpha)) assert_equal(d.logpdf(x[:-1]), dirichlet.logpdf(x[:-1], alpha)) def test_numpy_rvs_shape_compatibility(self): np.random.seed(2846) alpha = np.array([1.0, 2.0, 3.0]) x = np.random.dirichlet(alpha, size=7) assert_equal(x.shape, (7, 3)) assert_raises(ValueError, dirichlet.pdf, x, alpha) assert_raises(ValueError, dirichlet.logpdf, x, alpha) dirichlet.pdf(x.T, alpha) dirichlet.pdf(x.T[:-1], alpha) dirichlet.logpdf(x.T, alpha) dirichlet.logpdf(x.T[:-1], alpha) def test_alpha_with_zeros(self): np.random.seed(2846) alpha = [1.0, 0.0, 3.0] # don't pass invalid alpha to np.random.dirichlet x = np.random.dirichlet(np.maximum(1e-9, alpha), size=7).T assert_raises(ValueError, dirichlet.pdf, x, alpha) assert_raises(ValueError, dirichlet.logpdf, x, alpha) def test_alpha_with_negative_entries(self): np.random.seed(2846) alpha = [1.0, -2.0, 3.0] # don't pass invalid alpha to np.random.dirichlet x = np.random.dirichlet(np.maximum(1e-9, alpha), size=7).T assert_raises(ValueError, dirichlet.pdf, x, alpha) assert_raises(ValueError, dirichlet.logpdf, x, alpha) def test_data_with_zeros(self): alpha = np.array([1.0, 2.0, 3.0, 4.0]) x = np.array([0.1, 0.0, 0.2, 0.7]) dirichlet.pdf(x, alpha) dirichlet.logpdf(x, alpha) alpha = np.array([1.0, 1.0, 1.0, 1.0]) assert_almost_equal(dirichlet.pdf(x, alpha), 6) assert_almost_equal(dirichlet.logpdf(x, alpha), np.log(6)) def test_data_with_zeros_and_small_alpha(self): alpha = np.array([1.0, 0.5, 3.0, 4.0]) x = np.array([0.1, 0.0, 0.2, 0.7]) assert_raises(ValueError, dirichlet.pdf, x, alpha) assert_raises(ValueError, dirichlet.logpdf, x, alpha) def test_data_with_negative_entries(self): alpha = np.array([1.0, 2.0, 3.0, 4.0]) x = np.array([0.1, -0.1, 0.3, 0.7]) assert_raises(ValueError, dirichlet.pdf, x, alpha) assert_raises(ValueError, dirichlet.logpdf, x, alpha) def test_data_with_too_large_entries(self): alpha = np.array([1.0, 2.0, 3.0, 4.0]) x = np.array([0.1, 1.1, 0.3, 0.7]) assert_raises(ValueError, dirichlet.pdf, x, alpha) assert_raises(ValueError, dirichlet.logpdf, x, alpha) def test_data_too_deep_c(self): alpha = np.array([1.0, 2.0, 3.0]) x = np.full((2, 7, 7), 1 / 14) assert_raises(ValueError, dirichlet.pdf, x, alpha) assert_raises(ValueError, dirichlet.logpdf, x, alpha) def test_alpha_too_deep(self): alpha = np.array([[1.0, 2.0], [3.0, 4.0]]) x = np.full((2, 2, 7), 1 / 4) assert_raises(ValueError, dirichlet.pdf, x, alpha) assert_raises(ValueError, dirichlet.logpdf, x, alpha) def test_alpha_correct_depth(self): alpha = np.array([1.0, 2.0, 3.0]) x = np.full((3, 7), 1 / 3) dirichlet.pdf(x, alpha) dirichlet.logpdf(x, alpha) def test_non_simplex_data(self): alpha = np.array([1.0, 2.0, 3.0]) x = np.full((3, 7), 1 / 2) assert_raises(ValueError, dirichlet.pdf, x, alpha) assert_raises(ValueError, dirichlet.logpdf, x, alpha) def test_data_vector_too_short(self): alpha = np.array([1.0, 2.0, 3.0, 4.0]) x = np.full((2, 7), 1 / 2) assert_raises(ValueError, dirichlet.pdf, x, alpha) assert_raises(ValueError, dirichlet.logpdf, x, alpha) def test_data_vector_too_long(self): alpha = np.array([1.0, 2.0, 3.0, 4.0]) x = np.full((5, 7), 1 / 5) assert_raises(ValueError, dirichlet.pdf, x, alpha) assert_raises(ValueError, dirichlet.logpdf, x, alpha) def test_mean_and_var(self): alpha = np.array([1., 0.8, 0.2]) d = dirichlet(alpha) expected_var = [1. / 12., 0.08, 0.03] expected_mean = [0.5, 0.4, 0.1] assert_array_almost_equal(d.var(), expected_var) assert_array_almost_equal(d.mean(), expected_mean) def test_scalar_values(self): alpha = np.array([0.2]) d = dirichlet(alpha) # For alpha of length 1, mean and var should be scalar instead of array assert_equal(d.mean().ndim, 0) assert_equal(d.var().ndim, 0) assert_equal(d.pdf([1.]).ndim, 0) assert_equal(d.logpdf([1.]).ndim, 0) def test_K_and_K_minus_1_calls_equal(self): # Test that calls with K and K-1 entries yield the same results. np.random.seed(2846) n = np.random.randint(1, 32) alpha = np.random.uniform(10e-10, 100, n) d = dirichlet(alpha) num_tests = 10 for i in range(num_tests): x = np.random.uniform(10e-10, 100, n) x /= np.sum(x) assert_almost_equal(d.pdf(x[:-1]), d.pdf(x)) def test_multiple_entry_calls(self): # Test that calls with multiple x vectors as matrix work np.random.seed(2846) n = np.random.randint(1, 32) alpha = np.random.uniform(10e-10, 100, n) d = dirichlet(alpha) num_tests = 10 num_multiple = 5 xm = None for i in range(num_tests): for m in range(num_multiple): x = np.random.uniform(10e-10, 100, n) x /= np.sum(x) if xm is not None: xm = np.vstack((xm, x)) else: xm = x rm = d.pdf(xm.T) rs = None for xs in xm: r = d.pdf(xs) if rs is not None: rs = np.append(rs, r) else: rs = r assert_array_almost_equal(rm, rs) def test_2D_dirichlet_is_beta(self): np.random.seed(2846) alpha = np.random.uniform(10e-10, 100, 2) d = dirichlet(alpha) b = beta(alpha[0], alpha[1]) num_tests = 10 for i in range(num_tests): x = np.random.uniform(10e-10, 100, 2) x /= np.sum(x) assert_almost_equal(b.pdf(x), d.pdf([x])) assert_almost_equal(b.mean(), d.mean()[0]) assert_almost_equal(b.var(), d.var()[0]) def test_multivariate_normal_dimensions_mismatch(): # Regression test for GH #3493. Check that setting up a PDF with a mean of # length M and a covariance matrix of size (N, N), where M != N, raises a # ValueError with an informative error message. mu = np.array([0.0, 0.0]) sigma = np.array([[1.0]]) assert_raises(ValueError, multivariate_normal, mu, sigma) # A simple check that the right error message was passed along. Checking # that the entire message is there, word for word, would be somewhat # fragile, so we just check for the leading part. try: multivariate_normal(mu, sigma) except ValueError as e: msg = "Dimension mismatch" assert_equal(str(e)[:len(msg)], msg) class TestWishart(object): def test_scale_dimensions(self): # Test that we can call the Wishart with various scale dimensions # Test case: dim=1, scale=1 true_scale = np.array(1, ndmin=2) scales = [ 1, # scalar [1], # iterable np.array(1), # 0-dim np.r_[1], # 1-dim np.array(1, ndmin=2) # 2-dim ] for scale in scales: w = wishart(1, scale) assert_equal(w.scale, true_scale) assert_equal(w.scale.shape, true_scale.shape) # Test case: dim=2, scale=[[1,0] # [0,2] true_scale = np.array([[1,0], [0,2]]) scales = [ [1,2], # iterable np.r_[1,2], # 1-dim np.array([[1,0], # 2-dim [0,2]]) ] for scale in scales: w = wishart(2, scale) assert_equal(w.scale, true_scale) assert_equal(w.scale.shape, true_scale.shape) # We cannot call with a df < dim assert_raises(ValueError, wishart, 1, np.eye(2)) # We cannot call with a 3-dimension array scale = np.array(1, ndmin=3) assert_raises(ValueError, wishart, 1, scale) def test_quantile_dimensions(self): # Test that we can call the Wishart rvs with various quantile dimensions # If dim == 1, consider x.shape = [1,1,1] X = [ 1, # scalar [1], # iterable np.array(1), # 0-dim np.r_[1], # 1-dim np.array(1, ndmin=2), # 2-dim np.array([1], ndmin=3) # 3-dim ] w = wishart(1,1) density = w.pdf(np.array(1, ndmin=3)) for x in X: assert_equal(w.pdf(x), density) # If dim == 1, consider x.shape = [1,1,*] X = [ [1,2,3], # iterable np.r_[1,2,3], # 1-dim np.array([1,2,3], ndmin=3) # 3-dim ] w = wishart(1,1) density = w.pdf(np.array([1,2,3], ndmin=3)) for x in X: assert_equal(w.pdf(x), density) # If dim == 2, consider x.shape = [2,2,1] # where x[:,:,*] = np.eye(1)*2 X = [ 2, # scalar [2,2], # iterable np.array(2), # 0-dim np.r_[2,2], # 1-dim np.array([[2,0], [0,2]]), # 2-dim np.array([[2,0], [0,2]])[:,:,np.newaxis] # 3-dim ] w = wishart(2,np.eye(2)) density = w.pdf(np.array([[2,0], [0,2]])[:,:,np.newaxis]) for x in X: assert_equal(w.pdf(x), density) def test_frozen(self): # Test that the frozen and non-frozen Wishart gives the same answers # Construct an arbitrary positive definite scale matrix dim = 4 scale = np.diag(np.arange(dim)+1) scale[np.tril_indices(dim, k=-1)] = np.arange(dim * (dim-1) // 2) scale = np.dot(scale.T, scale) # Construct a collection of positive definite matrices to test the PDF X = [] for i in range(5): x = np.diag(np.arange(dim)+(i+1)**2) x[np.tril_indices(dim, k=-1)] = np.arange(dim * (dim-1) // 2) x = np.dot(x.T, x) X.append(x) X = np.array(X).T # Construct a 1D and 2D set of parameters parameters = [ (10, 1, np.linspace(0.1, 10, 5)), # 1D case (10, scale, X) ] for (df, scale, x) in parameters: w = wishart(df, scale) assert_equal(w.var(), wishart.var(df, scale)) assert_equal(w.mean(), wishart.mean(df, scale)) assert_equal(w.mode(), wishart.mode(df, scale)) assert_equal(w.entropy(), wishart.entropy(df, scale)) assert_equal(w.pdf(x), wishart.pdf(x, df, scale)) def test_1D_is_chisquared(self): # The 1-dimensional Wishart with an identity scale matrix is just a # chi-squared distribution. # Test variance, mean, entropy, pdf # Kolgomorov-Smirnov test for rvs np.random.seed(482974) sn = 500 dim = 1 scale = np.eye(dim) df_range = np.arange(1, 10, 2, dtype=float) X = np.linspace(0.1,10,num=10) for df in df_range: w = wishart(df, scale) c = chi2(df) # Statistics assert_allclose(w.var(), c.var()) assert_allclose(w.mean(), c.mean()) assert_allclose(w.entropy(), c.entropy()) # PDF assert_allclose(w.pdf(X), c.pdf(X)) # rvs rvs = w.rvs(size=sn) args = (df,) alpha = 0.01 check_distribution_rvs('chi2', args, alpha, rvs) def test_is_scaled_chisquared(self): # The 2-dimensional Wishart with an arbitrary scale matrix can be # transformed to a scaled chi-squared distribution. # For :math:`S \sim W_p(V,n)` and :math:`\lambda \in \mathbb{R}^p` we have # :math:`\lambda' S \lambda \sim \lambda' V \lambda \times \chi^2(n)` np.random.seed(482974) sn = 500 df = 10 dim = 4 # Construct an arbitrary positive definite matrix scale = np.diag(np.arange(4)+1) scale[np.tril_indices(4, k=-1)] = np.arange(6) scale = np.dot(scale.T, scale) # Use :math:`\lambda = [1, \dots, 1]'` lamda = np.ones((dim,1)) sigma_lamda = lamda.T.dot(scale).dot(lamda).squeeze() w = wishart(df, sigma_lamda) c = chi2(df, scale=sigma_lamda) # Statistics assert_allclose(w.var(), c.var()) assert_allclose(w.mean(), c.mean()) assert_allclose(w.entropy(), c.entropy()) # PDF X = np.linspace(0.1,10,num=10) assert_allclose(w.pdf(X), c.pdf(X)) # rvs rvs = w.rvs(size=sn) args = (df,0,sigma_lamda) alpha = 0.01 check_distribution_rvs('chi2', args, alpha, rvs) class TestMultinomial(object): def test_logpmf(self): vals1 = multinomial.logpmf((3,4), 7, (0.3, 0.7)) assert_allclose(vals1, -1.483270127243324, rtol=1e-8) vals2 = multinomial.logpmf([3, 4], 0, [.3, .7]) assert_allclose(vals2, np.NAN, rtol=1e-8) vals3 = multinomial.logpmf([3, 4], 0, [-2, 3]) assert_allclose(vals3, np.NAN, rtol=1e-8) def test_reduces_binomial(self): # test that the multinomial pmf reduces to the binomial pmf in the 2d # case val1 = multinomial.logpmf((3, 4), 7, (0.3, 0.7)) val2 = binom.logpmf(3, 7, 0.3) assert_allclose(val1, val2, rtol=1e-8) val1 = multinomial.pmf((6, 8), 14, (0.1, 0.9)) val2 = binom.pmf(6, 14, 0.1) assert_allclose(val1, val2, rtol=1e-8) def test_R(self): # test against the values produced by this R code # (https://stat.ethz.ch/R-manual/R-devel/library/stats/html/Multinom.html) # X <- t(as.matrix(expand.grid(0:3, 0:3))); X <- X[, colSums(X) <= 3] # X <- rbind(X, 3:3 - colSums(X)); dimnames(X) <- list(letters[1:3], NULL) # X # apply(X, 2, function(x) dmultinom(x, prob = c(1,2,5))) n, p = 3, [1./8, 2./8, 5./8] r_vals = {(0, 0, 3): 0.244140625, (1, 0, 2): 0.146484375, (2, 0, 1): 0.029296875, (3, 0, 0): 0.001953125, (0, 1, 2): 0.292968750, (1, 1, 1): 0.117187500, (2, 1, 0): 0.011718750, (0, 2, 1): 0.117187500, (1, 2, 0): 0.023437500, (0, 3, 0): 0.015625000} for x in r_vals: assert_allclose(multinomial.pmf(x, n, p), r_vals[x], atol=1e-14) def test_rvs_np(self): # test that .rvs agrees w/numpy sc_rvs = multinomial.rvs(3, [1/4.]*3, size=7, random_state=123) rndm = np.random.RandomState(123) np_rvs = rndm.multinomial(3, [1/4.]*3, size=7) assert_equal(sc_rvs, np_rvs) def test_pmf(self): vals0 = multinomial.pmf((5,), 5, (1,)) assert_allclose(vals0, 1, rtol=1e-8) vals1 = multinomial.pmf((3,4), 7, (.3, .7)) assert_allclose(vals1, .22689449999999994, rtol=1e-8) vals2 = multinomial.pmf([[[3,5],[0,8]], [[-1, 9], [1, 1]]], 8, (.1, .9)) assert_allclose(vals2, [[.03306744, .43046721], [0, 0]], rtol=1e-8) x = np.empty((0,2), dtype=np.float64) vals3 = multinomial.pmf(x, 4, (.3, .7)) assert_equal(vals3, np.empty([], dtype=np.float64)) vals4 = multinomial.pmf([1,2], 4, (.3, .7)) assert_allclose(vals4, 0, rtol=1e-8) vals5 = multinomial.pmf([3, 3, 0], 6, [2/3.0, 1/3.0, 0]) assert_allclose(vals5, 0.219478737997, rtol=1e-8) def test_pmf_broadcasting(self): vals0 = multinomial.pmf([1, 2], 3, [[.1, .9], [.2, .8]]) assert_allclose(vals0, [.243, .384], rtol=1e-8) vals1 = multinomial.pmf([1, 2], [3, 4], [.1, .9]) assert_allclose(vals1, [.243, 0], rtol=1e-8) vals2 = multinomial.pmf([[[1, 2], [1, 1]]], 3, [.1, .9]) assert_allclose(vals2, [[.243, 0]], rtol=1e-8) vals3 = multinomial.pmf([1, 2], [[[3], [4]]], [.1, .9]) assert_allclose(vals3, [[[.243], [0]]], rtol=1e-8) vals4 = multinomial.pmf([[1, 2], [1,1]], [[[[3]]]], [.1, .9]) assert_allclose(vals4, [[[[.243, 0]]]], rtol=1e-8) def test_cov(self): cov1 = multinomial.cov(5, (.2, .3, .5)) cov2 = [[5*.2*.8, -5*.2*.3, -5*.2*.5], [-5*.3*.2, 5*.3*.7, -5*.3*.5], [-5*.5*.2, -5*.5*.3, 5*.5*.5]] assert_allclose(cov1, cov2, rtol=1e-8) def test_cov_broadcasting(self): cov1 = multinomial.cov(5, [[.1, .9], [.2, .8]]) cov2 = [[[.45, -.45],[-.45, .45]], [[.8, -.8], [-.8, .8]]] assert_allclose(cov1, cov2, rtol=1e-8) cov3 = multinomial.cov([4, 5], [.1, .9]) cov4 = [[[.36, -.36], [-.36, .36]], [[.45, -.45], [-.45, .45]]] assert_allclose(cov3, cov4, rtol=1e-8) cov5 = multinomial.cov([4, 5], [[.3, .7], [.4, .6]]) cov6 = [[[4*.3*.7, -4*.3*.7], [-4*.3*.7, 4*.3*.7]], [[5*.4*.6, -5*.4*.6], [-5*.4*.6, 5*.4*.6]]] assert_allclose(cov5, cov6, rtol=1e-8) def test_entropy(self): # this is equivalent to a binomial distribution with n=2, so the # entropy .77899774929 is easily computed "by hand" ent0 = multinomial.entropy(2, [.2, .8]) assert_allclose(ent0, binom.entropy(2, .2), rtol=1e-8) def test_entropy_broadcasting(self): ent0 = multinomial.entropy([2, 3], [.2, .3]) assert_allclose(ent0, [binom.entropy(2, .2), binom.entropy(3, .2)], rtol=1e-8) ent1 = multinomial.entropy([7, 8], [[.3, .7], [.4, .6]]) assert_allclose(ent1, [binom.entropy(7, .3), binom.entropy(8, .4)], rtol=1e-8) ent2 = multinomial.entropy([[7], [8]], [[.3, .7], [.4, .6]]) assert_allclose(ent2, [[binom.entropy(7, .3), binom.entropy(7, .4)], [binom.entropy(8, .3), binom.entropy(8, .4)]], rtol=1e-8) def test_mean(self): mean1 = multinomial.mean(5, [.2, .8]) assert_allclose(mean1, [5*.2, 5*.8], rtol=1e-8) def test_mean_broadcasting(self): mean1 = multinomial.mean([5, 6], [.2, .8]) assert_allclose(mean1, [[5*.2, 5*.8], [6*.2, 6*.8]], rtol=1e-8) def test_frozen(self): # The frozen distribution should agree with the regular one np.random.seed(1234) n = 12 pvals = (.1, .2, .3, .4) x = [[0,0,0,12],[0,0,1,11],[0,1,1,10],[1,1,1,9],[1,1,2,8]] x = np.asarray(x, dtype=np.float64) mn_frozen = multinomial(n, pvals) assert_allclose(mn_frozen.pmf(x), multinomial.pmf(x, n, pvals)) assert_allclose(mn_frozen.logpmf(x), multinomial.logpmf(x, n, pvals)) assert_allclose(mn_frozen.entropy(), multinomial.entropy(n, pvals)) class TestInvwishart(object): def test_frozen(self): # Test that the frozen and non-frozen inverse Wishart gives the same # answers # Construct an arbitrary positive definite scale matrix dim = 4 scale = np.diag(np.arange(dim)+1) scale[np.tril_indices(dim, k=-1)] = np.arange(dim*(dim-1)/2) scale = np.dot(scale.T, scale) # Construct a collection of positive definite matrices to test the PDF X = [] for i in range(5): x = np.diag(np.arange(dim)+(i+1)**2) x[np.tril_indices(dim, k=-1)] = np.arange(dim*(dim-1)/2) x = np.dot(x.T, x) X.append(x) X = np.array(X).T # Construct a 1D and 2D set of parameters parameters = [ (10, 1, np.linspace(0.1, 10, 5)), # 1D case (10, scale, X) ] for (df, scale, x) in parameters: iw = invwishart(df, scale) assert_equal(iw.var(), invwishart.var(df, scale)) assert_equal(iw.mean(), invwishart.mean(df, scale)) assert_equal(iw.mode(), invwishart.mode(df, scale)) assert_allclose(iw.pdf(x), invwishart.pdf(x, df, scale)) def test_1D_is_invgamma(self): # The 1-dimensional inverse Wishart with an identity scale matrix is # just an inverse gamma distribution. # Test variance, mean, pdf # Kolgomorov-Smirnov test for rvs np.random.seed(482974) sn = 500 dim = 1 scale = np.eye(dim) df_range = np.arange(5, 20, 2, dtype=float) X = np.linspace(0.1,10,num=10) for df in df_range: iw = invwishart(df, scale) ig = invgamma(df/2, scale=1./2) # Statistics assert_allclose(iw.var(), ig.var()) assert_allclose(iw.mean(), ig.mean()) # PDF assert_allclose(iw.pdf(X), ig.pdf(X)) # rvs rvs = iw.rvs(size=sn) args = (df/2, 0, 1./2) alpha = 0.01 check_distribution_rvs('invgamma', args, alpha, rvs) def test_wishart_invwishart_2D_rvs(self): dim = 3 df = 10 # Construct a simple non-diagonal positive definite matrix scale = np.eye(dim) scale[0,1] = 0.5 scale[1,0] = 0.5 # Construct frozen Wishart and inverse Wishart random variables w = wishart(df, scale) iw = invwishart(df, scale) # Get the generated random variables from a known seed np.random.seed(248042) w_rvs = wishart.rvs(df, scale) np.random.seed(248042) frozen_w_rvs = w.rvs() np.random.seed(248042) iw_rvs = invwishart.rvs(df, scale) np.random.seed(248042) frozen_iw_rvs = iw.rvs() # Manually calculate what it should be, based on the Bartlett (1933) # decomposition of a Wishart into D A A' D', where D is the Cholesky # factorization of the scale matrix and A is the lower triangular matrix # with the square root of chi^2 variates on the diagonal and N(0,1) # variates in the lower triangle. np.random.seed(248042) covariances = np.random.normal(size=3) variances = np.r_[ np.random.chisquare(df), np.random.chisquare(df-1), np.random.chisquare(df-2), ]**0.5 # Construct the lower-triangular A matrix A = np.diag(variances) A[np.tril_indices(dim, k=-1)] = covariances # Wishart random variate D = np.linalg.cholesky(scale) DA = D.dot(A) manual_w_rvs = np.dot(DA, DA.T) # inverse Wishart random variate # Supposing that the inverse wishart has scale matrix `scale`, then the # random variate is the inverse of a random variate drawn from a Wishart # distribution with scale matrix `inv_scale = np.linalg.inv(scale)` iD = np.linalg.cholesky(np.linalg.inv(scale)) iDA = iD.dot(A) manual_iw_rvs = np.linalg.inv(np.dot(iDA, iDA.T)) # Test for equality assert_allclose(w_rvs, manual_w_rvs) assert_allclose(frozen_w_rvs, manual_w_rvs) assert_allclose(iw_rvs, manual_iw_rvs) assert_allclose(frozen_iw_rvs, manual_iw_rvs) def test_cho_inv_batch(self): """Regression test for gh-8844.""" a0 = np.array([[2, 1, 0, 0.5], [1, 2, 0.5, 0.5], [0, 0.5, 3, 1], [0.5, 0.5, 1, 2]]) a1 = np.array([[2, -1, 0, 0.5], [-1, 2, 0.5, 0.5], [0, 0.5, 3, 1], [0.5, 0.5, 1, 4]]) a = np.array([a0, a1]) ainv = a.copy() _cho_inv_batch(ainv) ident = np.eye(4) assert_allclose(a[0].dot(ainv[0]), ident, atol=1e-15) assert_allclose(a[1].dot(ainv[1]), ident, atol=1e-15) def test_logpdf_4x4(self): """Regression test for gh-8844.""" X = np.array([[2, 1, 0, 0.5], [1, 2, 0.5, 0.5], [0, 0.5, 3, 1], [0.5, 0.5, 1, 2]]) Psi = np.array([[9, 7, 3, 1], [7, 9, 5, 1], [3, 5, 8, 2], [1, 1, 2, 9]]) nu = 6 prob = invwishart.logpdf(X, nu, Psi) # Explicit calculation from the formula on wikipedia. p = X.shape[0] sig, logdetX = np.linalg.slogdet(X) sig, logdetPsi = np.linalg.slogdet(Psi) M = np.linalg.solve(X, Psi) expected = ((nu/2)*logdetPsi - (nu*p/2)*np.log(2) - multigammaln(nu/2, p) - (nu + p + 1)/2*logdetX - 0.5*M.trace()) assert_allclose(prob, expected) class TestSpecialOrthoGroup(object): def test_reproducibility(self): np.random.seed(514) x = special_ortho_group.rvs(3) expected = np.array([[-0.99394515, -0.04527879, 0.10011432], [0.04821555, -0.99846897, 0.02711042], [0.09873351, 0.03177334, 0.99460653]]) assert_array_almost_equal(x, expected) random_state = np.random.RandomState(seed=514) x = special_ortho_group.rvs(3, random_state=random_state) assert_array_almost_equal(x, expected) def test_invalid_dim(self): assert_raises(ValueError, special_ortho_group.rvs, None) assert_raises(ValueError, special_ortho_group.rvs, (2, 2)) assert_raises(ValueError, special_ortho_group.rvs, 1) assert_raises(ValueError, special_ortho_group.rvs, 2.5) def test_frozen_matrix(self): dim = 7 frozen = special_ortho_group(dim) rvs1 = frozen.rvs(random_state=1234) rvs2 = special_ortho_group.rvs(dim, random_state=1234) assert_equal(rvs1, rvs2) def test_det_and_ortho(self): xs = [special_ortho_group.rvs(dim) for dim in range(2,12) for i in range(3)] # Test that determinants are always +1 dets = [np.linalg.det(x) for x in xs] assert_allclose(dets, [1.]*30, rtol=1e-13) # Test that these are orthogonal matrices for x in xs: assert_array_almost_equal(np.dot(x, x.T), np.eye(x.shape[0])) def test_haar(self): # Test that the distribution is constant under rotation # Every column should have the same distribution # Additionally, the distribution should be invariant under another rotation # Generate samples dim = 5 samples = 1000 # Not too many, or the test takes too long ks_prob = .05 np.random.seed(514) xs = special_ortho_group.rvs(dim, size=samples) # Dot a few rows (0, 1, 2) with unit vectors (0, 2, 4, 3), # effectively picking off entries in the matrices of xs. # These projections should all have the same disribution, # establishing rotational invariance. We use the two-sided # KS test to confirm this. # We could instead test that angles between random vectors # are uniformly distributed, but the below is sufficient. # It is not feasible to consider all pairs, so pick a few. els = ((0,0), (0,2), (1,4), (2,3)) #proj = {(er, ec): [x[er][ec] for x in xs] for er, ec in els} proj = dict(((er, ec), sorted([x[er][ec] for x in xs])) for er, ec in els) pairs = [(e0, e1) for e0 in els for e1 in els if e0 > e1] ks_tests = [ks_2samp(proj[p0], proj[p1])[1] for (p0, p1) in pairs] assert_array_less([ks_prob]*len(pairs), ks_tests) class TestOrthoGroup(object): def test_reproducibility(self): np.random.seed(515) x = ortho_group.rvs(3) x2 = ortho_group.rvs(3, random_state=515) # Note this matrix has det -1, distinguishing O(N) from SO(N) assert_almost_equal(np.linalg.det(x), -1) expected = np.array([[0.94449759, -0.21678569, -0.24683651], [-0.13147569, -0.93800245, 0.3207266], [0.30106219, 0.27047251, 0.9144431]]) assert_array_almost_equal(x, expected) assert_array_almost_equal(x2, expected) def test_invalid_dim(self): assert_raises(ValueError, ortho_group.rvs, None) assert_raises(ValueError, ortho_group.rvs, (2, 2)) assert_raises(ValueError, ortho_group.rvs, 1) assert_raises(ValueError, ortho_group.rvs, 2.5) def test_det_and_ortho(self): xs = [[ortho_group.rvs(dim) for i in range(10)] for dim in range(2,12)] # Test that abs determinants are always +1 dets = np.array([[np.linalg.det(x) for x in xx] for xx in xs]) assert_allclose(np.fabs(dets), np.ones(dets.shape), rtol=1e-13) # Test that we get both positive and negative determinants # Check that we have at least one and less than 10 negative dets in a sample of 10. The rest are positive by the previous test. # Test each dimension separately assert_array_less([0]*10, [np.nonzero(d < 0)[0].shape[0] for d in dets]) assert_array_less([np.nonzero(d < 0)[0].shape[0] for d in dets], [10]*10) # Test that these are orthogonal matrices for xx in xs: for x in xx: assert_array_almost_equal(np.dot(x, x.T), np.eye(x.shape[0])) def test_haar(self): # Test that the distribution is constant under rotation # Every column should have the same distribution # Additionally, the distribution should be invariant under another rotation # Generate samples dim = 5 samples = 1000 # Not too many, or the test takes too long ks_prob = .05 np.random.seed(518) # Note that the test is sensitive to seed too xs = ortho_group.rvs(dim, size=samples) # Dot a few rows (0, 1, 2) with unit vectors (0, 2, 4, 3), # effectively picking off entries in the matrices of xs. # These projections should all have the same disribution, # establishing rotational invariance. We use the two-sided # KS test to confirm this. # We could instead test that angles between random vectors # are uniformly distributed, but the below is sufficient. # It is not feasible to consider all pairs, so pick a few. els = ((0,0), (0,2), (1,4), (2,3)) #proj = {(er, ec): [x[er][ec] for x in xs] for er, ec in els} proj = dict(((er, ec), sorted([x[er][ec] for x in xs])) for er, ec in els) pairs = [(e0, e1) for e0 in els for e1 in els if e0 > e1] ks_tests = [ks_2samp(proj[p0], proj[p1])[1] for (p0, p1) in pairs] assert_array_less([ks_prob]*len(pairs), ks_tests) @pytest.mark.slow def test_pairwise_distances(self): # Test that the distribution of pairwise distances is close to correct. np.random.seed(514) def random_ortho(dim): u, _s, v = np.linalg.svd(np.random.normal(size=(dim, dim))) return np.dot(u, v) for dim in range(2, 6): def generate_test_statistics(rvs, N=1000, eps=1e-10): stats = np.array([ np.sum((rvs(dim=dim) - rvs(dim=dim))**2) for _ in range(N) ]) # Add a bit of noise to account for numeric accuracy. stats += np.random.uniform(-eps, eps, size=stats.shape) return stats expected = generate_test_statistics(random_ortho) actual = generate_test_statistics(scipy.stats.ortho_group.rvs) _D, p = scipy.stats.ks_2samp(expected, actual) assert_array_less(.05, p) class TestRandomCorrelation(object): def test_reproducibility(self): np.random.seed(514) eigs = (.5, .8, 1.2, 1.5) x = random_correlation.rvs((.5, .8, 1.2, 1.5)) x2 = random_correlation.rvs((.5, .8, 1.2, 1.5), random_state=514) expected = np.array([[1., -0.20387311, 0.18366501, -0.04953711], [-0.20387311, 1., -0.24351129, 0.06703474], [0.18366501, -0.24351129, 1., 0.38530195], [-0.04953711, 0.06703474, 0.38530195, 1.]]) assert_array_almost_equal(x, expected) assert_array_almost_equal(x2, expected) def test_invalid_eigs(self): assert_raises(ValueError, random_correlation.rvs, None) assert_raises(ValueError, random_correlation.rvs, 'test') assert_raises(ValueError, random_correlation.rvs, 2.5) assert_raises(ValueError, random_correlation.rvs, [2.5]) assert_raises(ValueError, random_correlation.rvs, [[1,2],[3,4]]) assert_raises(ValueError, random_correlation.rvs, [2.5, -.5]) assert_raises(ValueError, random_correlation.rvs, [1, 2, .1]) def test_definition(self): # Test the definition of a correlation matrix in several dimensions: # # 1. Det is product of eigenvalues (and positive by construction # in examples) # 2. 1's on diagonal # 3. Matrix is symmetric def norm(i, e): return i*e/sum(e) np.random.seed(123) eigs = [norm(i, np.random.uniform(size=i)) for i in range(2, 6)] eigs.append([4,0,0,0]) ones = [[1.]*len(e) for e in eigs] xs = [random_correlation.rvs(e) for e in eigs] # Test that determinants are products of eigenvalues # These are positive by construction # Could also test that the eigenvalues themselves are correct, # but this seems sufficient. dets = [np.fabs(np.linalg.det(x)) for x in xs] dets_known = [np.prod(e) for e in eigs] assert_allclose(dets, dets_known, rtol=1e-13, atol=1e-13) # Test for 1's on the diagonal diags = [np.diag(x) for x in xs] for a, b in zip(diags, ones): assert_allclose(a, b, rtol=1e-13) # Correlation matrices are symmetric for x in xs: assert_allclose(x, x.T, rtol=1e-13) def test_to_corr(self): # Check some corner cases in to_corr # ajj == 1 m = np.array([[0.1, 0], [0, 1]], dtype=float) m = random_correlation._to_corr(m) assert_allclose(m, np.array([[1, 0], [0, 0.1]])) # Floating point overflow; fails to compute the correct # rotation, but should still produce some valid rotation # rather than infs/nans with np.errstate(over='ignore'): g = np.array([[0, 1], [-1, 0]]) m0 = np.array([[1e300, 0], [0, np.nextafter(1, 0)]], dtype=float) m = random_correlation._to_corr(m0.copy()) assert_allclose(m, g.T.dot(m0).dot(g)) m0 = np.array([[0.9, 1e300], [1e300, 1.1]], dtype=float) m = random_correlation._to_corr(m0.copy()) assert_allclose(m, g.T.dot(m0).dot(g)) # Zero discriminant; should set the first diag entry to 1 m0 = np.array([[2, 1], [1, 2]], dtype=float) m = random_correlation._to_corr(m0.copy()) assert_allclose(m[0,0], 1) # Slightly negative discriminant; should be approx correct still m0 = np.array([[2 + 1e-7, 1], [1, 2]], dtype=float) m = random_correlation._to_corr(m0.copy()) assert_allclose(m[0,0], 1) class TestUnitaryGroup(object): def test_reproducibility(self): np.random.seed(514) x = unitary_group.rvs(3) x2 = unitary_group.rvs(3, random_state=514) expected = np.array([[0.308771+0.360312j, 0.044021+0.622082j, 0.160327+0.600173j], [0.732757+0.297107j, 0.076692-0.4614j, -0.394349+0.022613j], [-0.148844+0.357037j, -0.284602-0.557949j, 0.607051+0.299257j]]) assert_array_almost_equal(x, expected) assert_array_almost_equal(x2, expected) def test_invalid_dim(self): assert_raises(ValueError, unitary_group.rvs, None) assert_raises(ValueError, unitary_group.rvs, (2, 2)) assert_raises(ValueError, unitary_group.rvs, 1) assert_raises(ValueError, unitary_group.rvs, 2.5) def test_unitarity(self): xs = [unitary_group.rvs(dim) for dim in range(2,12) for i in range(3)] # Test that these are unitary matrices for x in xs: assert_allclose(np.dot(x, x.conj().T), np.eye(x.shape[0]), atol=1e-15) def test_haar(self): # Test that the eigenvalues, which lie on the unit circle in # the complex plane, are uncorrelated. # Generate samples dim = 5 samples = 1000 # Not too many, or the test takes too long np.random.seed(514) # Note that the test is sensitive to seed too xs = unitary_group.rvs(dim, size=samples) # The angles "x" of the eigenvalues should be uniformly distributed # Overall this seems to be a necessary but weak test of the distribution. eigs = np.vstack([scipy.linalg.eigvals(x) for x in xs]) x = np.arctan2(eigs.imag, eigs.real) res = kstest(x.ravel(), uniform(-np.pi, 2*np.pi).cdf) assert_(res.pvalue > 0.05) def check_pickling(distfn, args): # check that a distribution instance pickles and unpickles # pay special attention to the random_state property # save the random_state (restore later) rndm = distfn.random_state distfn.random_state = 1234 distfn.rvs(*args, size=8) s = pickle.dumps(distfn) r0 = distfn.rvs(*args, size=8) unpickled = pickle.loads(s) r1 = unpickled.rvs(*args, size=8) assert_equal(r0, r1) # restore the random_state distfn.random_state = rndm def test_random_state_property(): scale = np.eye(3) scale[0, 1] = 0.5 scale[1, 0] = 0.5 dists = [ [multivariate_normal, ()], [dirichlet, (np.array([1.]), )], [wishart, (10, scale)], [invwishart, (10, scale)], [multinomial, (5, [0.5, 0.4, 0.1])], [ortho_group, (2,)], [special_ortho_group, (2,)] ] for distfn, args in dists: check_random_state_property(distfn, args) check_pickling(distfn, args)