# Author: Alexandre Gramfort # Fabian Pedregosa # # License: BSD 3 clause from math import log import numpy as np from scipy.linalg import pinvh import pytest from sklearn.utils._testing import assert_array_almost_equal from sklearn.utils._testing import assert_almost_equal from sklearn.utils._testing import assert_array_less from sklearn.utils._testing import assert_raise_message from sklearn.utils import check_random_state from sklearn.linear_model import BayesianRidge, ARDRegression from sklearn.linear_model import Ridge from sklearn import datasets from sklearn.utils.extmath import fast_logdet diabetes = datasets.load_diabetes() def test_n_iter(): """Check value of n_iter.""" X = np.array([[1], [2], [6], [8], [10]]) y = np.array([1, 2, 6, 8, 10]) clf = BayesianRidge(n_iter=0) msg = "n_iter should be greater than or equal to 1." assert_raise_message(ValueError, msg, clf.fit, X, y) def test_bayesian_ridge_scores(): """Check scores attribute shape""" X, y = diabetes.data, diabetes.target clf = BayesianRidge(compute_score=True) clf.fit(X, y) assert clf.scores_.shape == (clf.n_iter_ + 1,) def test_bayesian_ridge_score_values(): """Check value of score on toy example. Compute log marginal likelihood with equation (36) in Sparse Bayesian Learning and the Relevance Vector Machine (Tipping, 2001): - 0.5 * (log |Id/alpha + X.X^T/lambda| + y^T.(Id/alpha + X.X^T/lambda).y + n * log(2 * pi)) + lambda_1 * log(lambda) - lambda_2 * lambda + alpha_1 * log(alpha) - alpha_2 * alpha and check equality with the score computed during training. """ X, y = diabetes.data, diabetes.target n_samples = X.shape[0] # check with initial values of alpha and lambda (see code for the values) eps = np.finfo(np.float64).eps alpha_ = 1. / (np.var(y) + eps) lambda_ = 1. # value of the parameters of the Gamma hyperpriors alpha_1 = 0.1 alpha_2 = 0.1 lambda_1 = 0.1 lambda_2 = 0.1 # compute score using formula of docstring score = lambda_1 * log(lambda_) - lambda_2 * lambda_ score += alpha_1 * log(alpha_) - alpha_2 * alpha_ M = 1. / alpha_ * np.eye(n_samples) + 1. / lambda_ * np.dot(X, X.T) M_inv = pinvh(M) score += - 0.5 * (fast_logdet(M) + np.dot(y.T, np.dot(M_inv, y)) + n_samples * log(2 * np.pi)) # compute score with BayesianRidge clf = BayesianRidge(alpha_1=alpha_1, alpha_2=alpha_2, lambda_1=lambda_1, lambda_2=lambda_2, n_iter=1, fit_intercept=False, compute_score=True) clf.fit(X, y) assert_almost_equal(clf.scores_[0], score, decimal=9) def test_bayesian_ridge_parameter(): # Test correctness of lambda_ and alpha_ parameters (GitHub issue #8224) X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]]) y = np.array([1, 2, 3, 2, 0, 4, 5]).T # A Ridge regression model using an alpha value equal to the ratio of # lambda_ and alpha_ from the Bayesian Ridge model must be identical br_model = BayesianRidge(compute_score=True).fit(X, y) rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit(X, y) assert_array_almost_equal(rr_model.coef_, br_model.coef_) assert_almost_equal(rr_model.intercept_, br_model.intercept_) def test_bayesian_sample_weights(): # Test correctness of the sample_weights method X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]]) y = np.array([1, 2, 3, 2, 0, 4, 5]).T w = np.array([4, 3, 3, 1, 1, 2, 3]).T # A Ridge regression model using an alpha value equal to the ratio of # lambda_ and alpha_ from the Bayesian Ridge model must be identical br_model = BayesianRidge(compute_score=True).fit(X, y, sample_weight=w) rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit( X, y, sample_weight=w) assert_array_almost_equal(rr_model.coef_, br_model.coef_) assert_almost_equal(rr_model.intercept_, br_model.intercept_) def test_toy_bayesian_ridge_object(): # Test BayesianRidge on toy X = np.array([[1], [2], [6], [8], [10]]) Y = np.array([1, 2, 6, 8, 10]) clf = BayesianRidge(compute_score=True) clf.fit(X, Y) # Check that the model could approximately learn the identity function test = [[1], [3], [4]] assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2) def test_bayesian_initial_params(): # Test BayesianRidge with initial values (alpha_init, lambda_init) X = np.vander(np.linspace(0, 4, 5), 4) y = np.array([0., 1., 0., -1., 0.]) # y = (x^3 - 6x^2 + 8x) / 3 # In this case, starting from the default initial values will increase # the bias of the fitted curve. So, lambda_init should be small. reg = BayesianRidge(alpha_init=1., lambda_init=1e-3) # Check the R2 score nearly equals to one. r2 = reg.fit(X, y).score(X, y) assert_almost_equal(r2, 1.) def test_prediction_bayesian_ridge_ard_with_constant_input(): # Test BayesianRidge and ARDRegression predictions for edge case of # constant target vectors n_samples = 4 n_features = 5 random_state = check_random_state(42) constant_value = random_state.rand() X = random_state.random_sample((n_samples, n_features)) y = np.full(n_samples, constant_value, dtype=np.array(constant_value).dtype) expected = np.full(n_samples, constant_value, dtype=np.array(constant_value).dtype) for clf in [BayesianRidge(), ARDRegression()]: y_pred = clf.fit(X, y).predict(X) assert_array_almost_equal(y_pred, expected) def test_std_bayesian_ridge_ard_with_constant_input(): # Test BayesianRidge and ARDRegression standard dev. for edge case of # constant target vector # The standard dev. should be relatively small (< 0.01 is tested here) n_samples = 10 n_features = 5 random_state = check_random_state(42) constant_value = random_state.rand() X = random_state.random_sample((n_samples, n_features)) y = np.full(n_samples, constant_value, dtype=np.array(constant_value).dtype) expected_upper_boundary = 0.01 for clf in [BayesianRidge(), ARDRegression()]: _, y_std = clf.fit(X, y).predict(X, return_std=True) assert_array_less(y_std, expected_upper_boundary) def test_update_of_sigma_in_ard(): # Checks that `sigma_` is updated correctly after the last iteration # of the ARDRegression algorithm. See issue #10128. X = np.array([[1, 0], [0, 0]]) y = np.array([0, 0]) clf = ARDRegression(n_iter=1) clf.fit(X, y) # With the inputs above, ARDRegression prunes both of the two coefficients # in the first iteration. Hence, the expected shape of `sigma_` is (0, 0). assert clf.sigma_.shape == (0, 0) # Ensure that no error is thrown at prediction stage clf.predict(X, return_std=True) def test_toy_ard_object(): # Test BayesianRegression ARD classifier X = np.array([[1], [2], [3]]) Y = np.array([1, 2, 3]) clf = ARDRegression(compute_score=True) clf.fit(X, Y) # Check that the model could approximately learn the identity function test = [[1], [3], [4]] assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2) @pytest.mark.parametrize('seed', range(100)) @pytest.mark.parametrize('n_samples, n_features', ((10, 100), (100, 10))) def test_ard_accuracy_on_easy_problem(seed, n_samples, n_features): # Check that ARD converges with reasonable accuracy on an easy problem # (Github issue #14055) X = np.random.RandomState(seed=seed).normal(size=(250, 3)) y = X[:, 1] regressor = ARDRegression() regressor.fit(X, y) abs_coef_error = np.abs(1 - regressor.coef_[1]) assert abs_coef_error < 1e-10 def test_return_std(): # Test return_std option for both Bayesian regressors def f(X): return np.dot(X, w) + b def f_noise(X, noise_mult): return f(X) + np.random.randn(X.shape[0]) * noise_mult d = 5 n_train = 50 n_test = 10 w = np.array([1.0, 0.0, 1.0, -1.0, 0.0]) b = 1.0 X = np.random.random((n_train, d)) X_test = np.random.random((n_test, d)) for decimal, noise_mult in enumerate([1, 0.1, 0.01]): y = f_noise(X, noise_mult) m1 = BayesianRidge() m1.fit(X, y) y_mean1, y_std1 = m1.predict(X_test, return_std=True) assert_array_almost_equal(y_std1, noise_mult, decimal=decimal) m2 = ARDRegression() m2.fit(X, y) y_mean2, y_std2 = m2.predict(X_test, return_std=True) assert_array_almost_equal(y_std2, noise_mult, decimal=decimal) @pytest.mark.parametrize('seed', range(10)) def test_update_sigma(seed): # make sure the two update_sigma() helpers are equivalent. The woodbury # formula is used when n_samples < n_features, and the other one is used # otherwise. rng = np.random.RandomState(seed) # set n_samples == n_features to avoid instability issues when inverting # the matrices. Using the woodbury formula would be unstable when # n_samples > n_features n_samples = n_features = 10 X = rng.randn(n_samples, n_features) alpha = 1 lmbda = np.arange(1, n_features + 1) keep_lambda = np.array([True] * n_features) reg = ARDRegression() sigma = reg._update_sigma(X, alpha, lmbda, keep_lambda) sigma_woodbury = reg._update_sigma_woodbury(X, alpha, lmbda, keep_lambda) np.testing.assert_allclose(sigma, sigma_woodbury) def test_ard_regression_predict_normalize_true(): """Check that we can predict with `normalize=True` and `return_std=True`. Non-regression test for: https://github.com/scikit-learn/scikit-learn/issues/18605 """ clf = ARDRegression(normalize=True) clf.fit([[0, 0], [1, 1], [2, 2]], [0, 1, 2]) clf.predict([[1, 1]], return_std=True)