# Authors: Olivier Grisel # Alexandre Gramfort # License: BSD 3 clause import numpy as np import pytest from scipy import interpolate, sparse from copy import deepcopy import joblib from sklearn.base import is_classifier from sklearn.datasets import load_diabetes from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.exceptions import ConvergenceWarning from sklearn.utils._testing import assert_allclose from sklearn.utils._testing import assert_array_almost_equal from sklearn.utils._testing import assert_almost_equal from sklearn.utils._testing import assert_raises from sklearn.utils._testing import assert_raises_regex from sklearn.utils._testing import assert_raise_message from sklearn.utils._testing import assert_warns from sklearn.utils._testing import assert_warns_message from sklearn.utils._testing import ignore_warnings from sklearn.utils._testing import assert_array_equal from sklearn.utils._testing import TempMemmap from sklearn.utils.fixes import parse_version from sklearn.linear_model import ( ARDRegression, BayesianRidge, ElasticNet, ElasticNetCV, enet_path, Lars, lars_path, Lasso, LassoCV, LassoLars, LassoLarsCV, LassoLarsIC, lasso_path, LinearRegression, MultiTaskElasticNet, MultiTaskElasticNetCV, MultiTaskLasso, MultiTaskLassoCV, OrthogonalMatchingPursuit, Ridge, RidgeClassifier, RidgeCV, ) from sklearn.linear_model._coordinate_descent import _set_order from sklearn.utils import check_array @pytest.mark.parametrize('l1_ratio', (-1, 2, None, 10, 'something_wrong')) def test_l1_ratio_param_invalid(l1_ratio): # Check that correct error is raised when l1_ratio in ElasticNet # is outside the correct range X = np.array([[-1.], [0.], [1.]]) Y = [-1, 0, 1] # just a straight line msg = "l1_ratio must be between 0 and 1; got l1_ratio=" clf = ElasticNet(alpha=0.1, l1_ratio=l1_ratio) with pytest.raises(ValueError, match=msg): clf.fit(X, Y) @pytest.mark.parametrize('order', ['C', 'F']) @pytest.mark.parametrize('input_order', ['C', 'F']) def test_set_order_dense(order, input_order): """Check that _set_order returns arrays with promised order.""" X = np.array([[0], [0], [0]], order=input_order) y = np.array([0, 0, 0], order=input_order) X2, y2 = _set_order(X, y, order=order) if order == 'C': assert X2.flags['C_CONTIGUOUS'] assert y2.flags['C_CONTIGUOUS'] elif order == 'F': assert X2.flags['F_CONTIGUOUS'] assert y2.flags['F_CONTIGUOUS'] if order == input_order: assert X is X2 assert y is y2 @pytest.mark.parametrize('order', ['C', 'F']) @pytest.mark.parametrize('input_order', ['C', 'F']) def test_set_order_sparse(order, input_order): """Check that _set_order returns sparse matrices in promised format.""" X = sparse.coo_matrix(np.array([[0], [0], [0]])) y = sparse.coo_matrix(np.array([0, 0, 0])) sparse_format = "csc" if input_order == "F" else "csr" X = X.asformat(sparse_format) y = X.asformat(sparse_format) X2, y2 = _set_order(X, y, order=order) if order == 'C': assert sparse.isspmatrix_csr(X2) assert sparse.isspmatrix_csr(y2) elif order == 'F': assert sparse.isspmatrix_csc(X2) assert sparse.isspmatrix_csc(y2) def test_lasso_zero(): # Check that the lasso can handle zero data without crashing X = [[0], [0], [0]] y = [0, 0, 0] clf = Lasso(alpha=0.1).fit(X, y) pred = clf.predict([[1], [2], [3]]) assert_array_almost_equal(clf.coef_, [0]) assert_array_almost_equal(pred, [0, 0, 0]) assert_almost_equal(clf.dual_gap_, 0) def test_lasso_toy(): # Test Lasso on a toy example for various values of alpha. # When validating this against glmnet notice that glmnet divides it # against nobs. X = [[-1], [0], [1]] Y = [-1, 0, 1] # just a straight line T = [[2], [3], [4]] # test sample clf = Lasso(alpha=1e-8) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [1]) assert_array_almost_equal(pred, [2, 3, 4]) assert_almost_equal(clf.dual_gap_, 0) clf = Lasso(alpha=0.1) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [.85]) assert_array_almost_equal(pred, [1.7, 2.55, 3.4]) assert_almost_equal(clf.dual_gap_, 0) clf = Lasso(alpha=0.5) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [.25]) assert_array_almost_equal(pred, [0.5, 0.75, 1.]) assert_almost_equal(clf.dual_gap_, 0) clf = Lasso(alpha=1) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [.0]) assert_array_almost_equal(pred, [0, 0, 0]) assert_almost_equal(clf.dual_gap_, 0) def test_enet_toy(): # Test ElasticNet for various parameters of alpha and l1_ratio. # Actually, the parameters alpha = 0 should not be allowed. However, # we test it as a border case. # ElasticNet is tested with and without precomputed Gram matrix X = np.array([[-1.], [0.], [1.]]) Y = [-1, 0, 1] # just a straight line T = [[2.], [3.], [4.]] # test sample # this should be the same as lasso clf = ElasticNet(alpha=1e-8, l1_ratio=1.0) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [1]) assert_array_almost_equal(pred, [2, 3, 4]) assert_almost_equal(clf.dual_gap_, 0) clf = ElasticNet(alpha=0.5, l1_ratio=0.3, max_iter=100, precompute=False) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [0.50819], decimal=3) assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3) assert_almost_equal(clf.dual_gap_, 0) clf.set_params(max_iter=100, precompute=True) clf.fit(X, Y) # with Gram pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [0.50819], decimal=3) assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3) assert_almost_equal(clf.dual_gap_, 0) clf.set_params(max_iter=100, precompute=np.dot(X.T, X)) clf.fit(X, Y) # with Gram pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [0.50819], decimal=3) assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3) assert_almost_equal(clf.dual_gap_, 0) clf = ElasticNet(alpha=0.5, l1_ratio=0.5) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [0.45454], 3) assert_array_almost_equal(pred, [0.9090, 1.3636, 1.8181], 3) assert_almost_equal(clf.dual_gap_, 0) def build_dataset(n_samples=50, n_features=200, n_informative_features=10, n_targets=1): """ build an ill-posed linear regression problem with many noisy features and comparatively few samples """ random_state = np.random.RandomState(0) if n_targets > 1: w = random_state.randn(n_features, n_targets) else: w = random_state.randn(n_features) w[n_informative_features:] = 0.0 X = random_state.randn(n_samples, n_features) y = np.dot(X, w) X_test = random_state.randn(n_samples, n_features) y_test = np.dot(X_test, w) return X, y, X_test, y_test def test_lasso_cv(): X, y, X_test, y_test = build_dataset() max_iter = 150 clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, cv=3).fit(X, y) assert_almost_equal(clf.alpha_, 0.056, 2) clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True, cv=3) clf.fit(X, y) assert_almost_equal(clf.alpha_, 0.056, 2) # Check that the lars and the coordinate descent implementation # select a similar alpha lars = LassoLarsCV(normalize=False, max_iter=30, cv=3).fit(X, y) # for this we check that they don't fall in the grid of # clf.alphas further than 1 assert np.abs(np.searchsorted(clf.alphas_[::-1], lars.alpha_) - np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1 # check that they also give a similar MSE mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.mse_path_.T) np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(), clf.mse_path_[5].mean(), significant=2) # test set assert clf.score(X_test, y_test) > 0.99 def test_lasso_cv_with_some_model_selection(): from sklearn.model_selection import ShuffleSplit from sklearn import datasets diabetes = datasets.load_diabetes() X = diabetes.data y = diabetes.target pipe = make_pipeline( StandardScaler(), LassoCV(cv=ShuffleSplit(random_state=0)) ) pipe.fit(X, y) def test_lasso_cv_positive_constraint(): X, y, X_test, y_test = build_dataset() max_iter = 500 # Ensure the unconstrained fit has a negative coefficient clf_unconstrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2, n_jobs=1) clf_unconstrained.fit(X, y) assert min(clf_unconstrained.coef_) < 0 # On same data, constrained fit has non-negative coefficients clf_constrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter, positive=True, cv=2, n_jobs=1) clf_constrained.fit(X, y) assert min(clf_constrained.coef_) >= 0 @pytest.mark.parametrize( "LinearModel, params", [(Lasso, {"tol": 1e-16, "alpha": 0.1}), (LassoLars, {"alpha": 0.1}), (RidgeClassifier, {"solver": 'sparse_cg', "alpha": 0.1}), (ElasticNet, {"tol": 1e-16, 'l1_ratio': 1, "alpha": 0.1}), (ElasticNet, {"tol": 1e-16, 'l1_ratio': 0, "alpha": 0.1}), (Ridge, {"solver": 'sparse_cg', 'tol': 1e-12, "alpha": 0.1}), (BayesianRidge, {}), (ARDRegression, {}), (OrthogonalMatchingPursuit, {}), (MultiTaskElasticNet, {"tol": 1e-16, 'l1_ratio': 1, "alpha": 0.1}), (MultiTaskElasticNet, {"tol": 1e-16, 'l1_ratio': 0, "alpha": 0.1}), (MultiTaskLasso, {"tol": 1e-16, "alpha": 0.1}), (Lars, {}), (LinearRegression, {}), (LassoLarsIC, {})] ) def test_model_pipeline_same_as_normalize_true(LinearModel, params): # Test that linear models (LinearModel) set with normalize set to True are # doing the same as the same linear model preceeded by StandardScaler # in the pipeline and with normalize set to False # normalize is True model_name = LinearModel.__name__ model_normalize = LinearModel(normalize=True, fit_intercept=True, **params) pipeline = make_pipeline( StandardScaler(), LinearModel(normalize=False, fit_intercept=True, **params) ) is_multitask = model_normalize._get_tags()["multioutput_only"] # prepare the data n_samples, n_features = 100, 2 rng = np.random.RandomState(0) w = rng.randn(n_features) X = rng.randn(n_samples, n_features) X += 20 # make features non-zero mean y = X.dot(w) # make classes out of regression if is_classifier(model_normalize): y[y > np.mean(y)] = -1 y[y > 0] = 1 if is_multitask: y = np.stack((y, y), axis=1) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) if 'alpha' in params: model_normalize.set_params(alpha=params['alpha']) if model_name in ['Lasso', 'LassoLars', 'MultiTaskLasso']: new_params = dict( alpha=params['alpha'] * np.sqrt(X_train.shape[0])) if model_name in ['Ridge', 'RidgeClassifier']: new_params = dict(alpha=params['alpha'] * X_train.shape[0]) if model_name in ['ElasticNet', 'MultiTaskElasticNet']: if params['l1_ratio'] == 1: new_params = dict( alpha=params['alpha'] * np.sqrt(X_train.shape[0])) if params['l1_ratio'] == 0: new_params = dict(alpha=params['alpha'] * X_train.shape[0]) if 'new_params' in locals(): pipeline[1].set_params(**new_params) model_normalize.fit(X_train, y_train) y_pred_normalize = model_normalize.predict(X_test) pipeline.fit(X_train, y_train) y_pred_standardize = pipeline.predict(X_test) assert_allclose( model_normalize.coef_ * pipeline[0].scale_, pipeline[1].coef_) assert pipeline[1].intercept_ == pytest.approx(y_train.mean()) assert (model_normalize.intercept_ == pytest.approx(y_train.mean() - model_normalize.coef_.dot(X_train.mean(0)))) assert_allclose(y_pred_normalize, y_pred_standardize) @pytest.mark.parametrize( "LinearModel, params", [(Lasso, {"tol": 1e-16, "alpha": 0.1}), (LassoCV, {"tol": 1e-16}), (ElasticNetCV, {}), (RidgeClassifier, {"solver": 'sparse_cg', "alpha": 0.1}), (ElasticNet, {"tol": 1e-16, 'l1_ratio': 1, "alpha": 0.01}), (ElasticNet, {"tol": 1e-16, 'l1_ratio': 0, "alpha": 0.01}), (Ridge, {"solver": 'sparse_cg', 'tol': 1e-12, "alpha": 0.1}), (LinearRegression, {}), (RidgeCV, {})] ) def test_model_pipeline_same_dense_and_sparse(LinearModel, params): # Test that linear model preceeded by StandardScaler in the pipeline and # with normalize set to False gives the same y_pred and the same .coef_ # given X sparse or dense model_dense = make_pipeline( StandardScaler(with_mean=False), LinearModel(normalize=False, **params) ) model_sparse = make_pipeline( StandardScaler(with_mean=False), LinearModel(normalize=False, **params) ) # prepare the data rng = np.random.RandomState(0) n_samples = 200 n_features = 2 X = rng.randn(n_samples, n_features) X[X < 0.1] = 0. X_sparse = sparse.csr_matrix(X) y = rng.rand(n_samples) if is_classifier(model_dense): y = np.sign(y) model_dense.fit(X, y) model_sparse.fit(X_sparse, y) assert_allclose(model_sparse[1].coef_, model_dense[1].coef_) y_pred_dense = model_dense.predict(X) y_pred_sparse = model_sparse.predict(X_sparse) assert_allclose(y_pred_dense, y_pred_sparse) assert_allclose(model_dense[1].intercept_, model_sparse[1].intercept_) def test_lasso_path_return_models_vs_new_return_gives_same_coefficients(): # Test that lasso_path with lars_path style output gives the # same result # Some toy data X = np.array([[1, 2, 3.1], [2.3, 5.4, 4.3]]).T y = np.array([1, 2, 3.1]) alphas = [5., 1., .5] # Use lars_path and lasso_path(new output) with 1D linear interpolation # to compute the same path alphas_lars, _, coef_path_lars = lars_path(X, y, method='lasso') coef_path_cont_lars = interpolate.interp1d(alphas_lars[::-1], coef_path_lars[:, ::-1]) alphas_lasso2, coef_path_lasso2, _ = lasso_path(X, y, alphas=alphas, return_models=False) coef_path_cont_lasso = interpolate.interp1d(alphas_lasso2[::-1], coef_path_lasso2[:, ::-1]) assert_array_almost_equal( coef_path_cont_lasso(alphas), coef_path_cont_lars(alphas), decimal=1) def test_enet_path(): # We use a large number of samples and of informative features so that # the l1_ratio selected is more toward ridge than lasso X, y, X_test, y_test = build_dataset(n_samples=200, n_features=100, n_informative_features=100) max_iter = 150 # Here we have a small number of iterations, and thus the # ElasticNet might not converge. This is to speed up tests clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3, l1_ratio=[0.5, 0.7], cv=3, max_iter=max_iter) ignore_warnings(clf.fit)(X, y) # Well-conditioned settings, we should have selected our # smallest penalty assert_almost_equal(clf.alpha_, min(clf.alphas_)) # Non-sparse ground truth: we should have selected an elastic-net # that is closer to ridge than to lasso assert clf.l1_ratio_ == min(clf.l1_ratio) clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3, l1_ratio=[0.5, 0.7], cv=3, max_iter=max_iter, precompute=True) ignore_warnings(clf.fit)(X, y) # Well-conditioned settings, we should have selected our # smallest penalty assert_almost_equal(clf.alpha_, min(clf.alphas_)) # Non-sparse ground truth: we should have selected an elastic-net # that is closer to ridge than to lasso assert clf.l1_ratio_ == min(clf.l1_ratio) # We are in well-conditioned settings with low noise: we should # have a good test-set performance assert clf.score(X_test, y_test) > 0.99 # Multi-output/target case X, y, X_test, y_test = build_dataset(n_features=10, n_targets=3) clf = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7], cv=3, max_iter=max_iter) ignore_warnings(clf.fit)(X, y) # We are in well-conditioned settings with low noise: we should # have a good test-set performance assert clf.score(X_test, y_test) > 0.99 assert clf.coef_.shape == (3, 10) # Mono-output should have same cross-validated alpha_ and l1_ratio_ # in both cases. X, y, _, _ = build_dataset(n_features=10) clf1 = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7]) clf1.fit(X, y) clf2 = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7]) clf2.fit(X, y[:, np.newaxis]) assert_almost_equal(clf1.l1_ratio_, clf2.l1_ratio_) assert_almost_equal(clf1.alpha_, clf2.alpha_) def test_path_parameters(): X, y, _, _ = build_dataset() max_iter = 100 clf = ElasticNetCV(n_alphas=50, eps=1e-3, max_iter=max_iter, l1_ratio=0.5, tol=1e-3) clf.fit(X, y) # new params assert_almost_equal(0.5, clf.l1_ratio) assert 50 == clf.n_alphas assert 50 == len(clf.alphas_) def test_warm_start(): X, y, _, _ = build_dataset() clf = ElasticNet(alpha=0.1, max_iter=5, warm_start=True) ignore_warnings(clf.fit)(X, y) ignore_warnings(clf.fit)(X, y) # do a second round with 5 iterations clf2 = ElasticNet(alpha=0.1, max_iter=10) ignore_warnings(clf2.fit)(X, y) assert_array_almost_equal(clf2.coef_, clf.coef_) def test_lasso_alpha_warning(): X = [[-1], [0], [1]] Y = [-1, 0, 1] # just a straight line clf = Lasso(alpha=0) assert_warns(UserWarning, clf.fit, X, Y) def test_lasso_positive_constraint(): X = [[-1], [0], [1]] y = [1, 0, -1] # just a straight line with negative slope lasso = Lasso(alpha=0.1, max_iter=1000, positive=True) lasso.fit(X, y) assert min(lasso.coef_) >= 0 lasso = Lasso(alpha=0.1, max_iter=1000, precompute=True, positive=True) lasso.fit(X, y) assert min(lasso.coef_) >= 0 def test_enet_positive_constraint(): X = [[-1], [0], [1]] y = [1, 0, -1] # just a straight line with negative slope enet = ElasticNet(alpha=0.1, max_iter=1000, positive=True) enet.fit(X, y) assert min(enet.coef_) >= 0 def test_enet_cv_positive_constraint(): X, y, X_test, y_test = build_dataset() max_iter = 500 # Ensure the unconstrained fit has a negative coefficient enetcv_unconstrained = ElasticNetCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2, n_jobs=1) enetcv_unconstrained.fit(X, y) assert min(enetcv_unconstrained.coef_) < 0 # On same data, constrained fit has non-negative coefficients enetcv_constrained = ElasticNetCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2, positive=True, n_jobs=1) enetcv_constrained.fit(X, y) assert min(enetcv_constrained.coef_) >= 0 def test_uniform_targets(): enet = ElasticNetCV(n_alphas=3) m_enet = MultiTaskElasticNetCV(n_alphas=3) lasso = LassoCV(n_alphas=3) m_lasso = MultiTaskLassoCV(n_alphas=3) models_single_task = (enet, lasso) models_multi_task = (m_enet, m_lasso) rng = np.random.RandomState(0) X_train = rng.random_sample(size=(10, 3)) X_test = rng.random_sample(size=(10, 3)) y1 = np.empty(10) y2 = np.empty((10, 2)) for model in models_single_task: for y_values in (0, 5): y1.fill(y_values) assert_array_equal(model.fit(X_train, y1).predict(X_test), y1) assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3) for model in models_multi_task: for y_values in (0, 5): y2[:, 0].fill(y_values) y2[:, 1].fill(2 * y_values) assert_array_equal(model.fit(X_train, y2).predict(X_test), y2) assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3) def test_multi_task_lasso_and_enet(): X, y, X_test, y_test = build_dataset() Y = np.c_[y, y] # Y_test = np.c_[y_test, y_test] clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y) assert 0 < clf.dual_gap_ < 1e-5 assert_array_almost_equal(clf.coef_[0], clf.coef_[1]) clf = MultiTaskElasticNet(alpha=1, tol=1e-8).fit(X, Y) assert 0 < clf.dual_gap_ < 1e-5 assert_array_almost_equal(clf.coef_[0], clf.coef_[1]) clf = MultiTaskElasticNet(alpha=1.0, tol=1e-8, max_iter=1) assert_warns_message(ConvergenceWarning, 'did not converge', clf.fit, X, Y) def test_lasso_readonly_data(): X = np.array([[-1], [0], [1]]) Y = np.array([-1, 0, 1]) # just a straight line T = np.array([[2], [3], [4]]) # test sample with TempMemmap((X, Y)) as (X, Y): clf = Lasso(alpha=0.5) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [.25]) assert_array_almost_equal(pred, [0.5, 0.75, 1.]) assert_almost_equal(clf.dual_gap_, 0) def test_multi_task_lasso_readonly_data(): X, y, X_test, y_test = build_dataset() Y = np.c_[y, y] with TempMemmap((X, Y)) as (X, Y): Y = np.c_[y, y] clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y) assert 0 < clf.dual_gap_ < 1e-5 assert_array_almost_equal(clf.coef_[0], clf.coef_[1]) def test_enet_multitarget(): n_targets = 3 X, y, _, _ = build_dataset(n_samples=10, n_features=8, n_informative_features=10, n_targets=n_targets) estimator = ElasticNet(alpha=0.01) estimator.fit(X, y) coef, intercept, dual_gap = (estimator.coef_, estimator.intercept_, estimator.dual_gap_) for k in range(n_targets): estimator.fit(X, y[:, k]) assert_array_almost_equal(coef[k, :], estimator.coef_) assert_array_almost_equal(intercept[k], estimator.intercept_) assert_array_almost_equal(dual_gap[k], estimator.dual_gap_) def test_multioutput_enetcv_error(): rng = np.random.RandomState(0) X = rng.randn(10, 2) y = rng.randn(10, 2) clf = ElasticNetCV() assert_raises(ValueError, clf.fit, X, y) def test_multitask_enet_and_lasso_cv(): X, y, _, _ = build_dataset(n_features=50, n_targets=3) clf = MultiTaskElasticNetCV(cv=3).fit(X, y) assert_almost_equal(clf.alpha_, 0.00556, 3) clf = MultiTaskLassoCV(cv=3).fit(X, y) assert_almost_equal(clf.alpha_, 0.00278, 3) X, y, _, _ = build_dataset(n_targets=3) clf = MultiTaskElasticNetCV(n_alphas=10, eps=1e-3, max_iter=100, l1_ratio=[0.3, 0.5], tol=1e-3, cv=3) clf.fit(X, y) assert 0.5 == clf.l1_ratio_ assert (3, X.shape[1]) == clf.coef_.shape assert (3, ) == clf.intercept_.shape assert (2, 10, 3) == clf.mse_path_.shape assert (2, 10) == clf.alphas_.shape X, y, _, _ = build_dataset(n_targets=3) clf = MultiTaskLassoCV(n_alphas=10, eps=1e-3, max_iter=100, tol=1e-3, cv=3) clf.fit(X, y) assert (3, X.shape[1]) == clf.coef_.shape assert (3, ) == clf.intercept_.shape assert (10, 3) == clf.mse_path_.shape assert 10 == len(clf.alphas_) def test_1d_multioutput_enet_and_multitask_enet_cv(): X, y, _, _ = build_dataset(n_features=10) y = y[:, np.newaxis] clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7]) clf.fit(X, y[:, 0]) clf1 = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7]) clf1.fit(X, y) assert_almost_equal(clf.l1_ratio_, clf1.l1_ratio_) assert_almost_equal(clf.alpha_, clf1.alpha_) assert_almost_equal(clf.coef_, clf1.coef_[0]) assert_almost_equal(clf.intercept_, clf1.intercept_[0]) def test_1d_multioutput_lasso_and_multitask_lasso_cv(): X, y, _, _ = build_dataset(n_features=10) y = y[:, np.newaxis] clf = LassoCV(n_alphas=5, eps=2e-3) clf.fit(X, y[:, 0]) clf1 = MultiTaskLassoCV(n_alphas=5, eps=2e-3) clf1.fit(X, y) assert_almost_equal(clf.alpha_, clf1.alpha_) assert_almost_equal(clf.coef_, clf1.coef_[0]) assert_almost_equal(clf.intercept_, clf1.intercept_[0]) def test_sparse_input_dtype_enet_and_lassocv(): X, y, _, _ = build_dataset(n_features=10) clf = ElasticNetCV(n_alphas=5) clf.fit(sparse.csr_matrix(X), y) clf1 = ElasticNetCV(n_alphas=5) clf1.fit(sparse.csr_matrix(X, dtype=np.float32), y) assert_almost_equal(clf.alpha_, clf1.alpha_, decimal=6) assert_almost_equal(clf.coef_, clf1.coef_, decimal=6) clf = LassoCV(n_alphas=5) clf.fit(sparse.csr_matrix(X), y) clf1 = LassoCV(n_alphas=5) clf1.fit(sparse.csr_matrix(X, dtype=np.float32), y) assert_almost_equal(clf.alpha_, clf1.alpha_, decimal=6) assert_almost_equal(clf.coef_, clf1.coef_, decimal=6) def test_precompute_invalid_argument(): X, y, _, _ = build_dataset() for clf in [ElasticNetCV(precompute="invalid"), LassoCV(precompute="invalid")]: assert_raises_regex(ValueError, ".*should be.*True.*False.*auto.*" "array-like.*Got 'invalid'", clf.fit, X, y) # Precompute = 'auto' is not supported for ElasticNet and Lasso assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*" "Got 'auto'", ElasticNet(precompute='auto').fit, X, y) assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*" "Got 'auto'", Lasso(precompute='auto').fit, X, y) def test_warm_start_convergence(): X, y, _, _ = build_dataset() model = ElasticNet(alpha=1e-3, tol=1e-3).fit(X, y) n_iter_reference = model.n_iter_ # This dataset is not trivial enough for the model to converge in one pass. assert n_iter_reference > 2 # Check that n_iter_ is invariant to multiple calls to fit # when warm_start=False, all else being equal. model.fit(X, y) n_iter_cold_start = model.n_iter_ assert n_iter_cold_start == n_iter_reference # Fit the same model again, using a warm start: the optimizer just performs # a single pass before checking that it has already converged model.set_params(warm_start=True) model.fit(X, y) n_iter_warm_start = model.n_iter_ assert n_iter_warm_start == 1 def test_warm_start_convergence_with_regularizer_decrement(): X, y = load_diabetes(return_X_y=True) # Train a model to converge on a lightly regularized problem final_alpha = 1e-5 low_reg_model = ElasticNet(alpha=final_alpha).fit(X, y) # Fitting a new model on a more regularized version of the same problem. # Fitting with high regularization is easier it should converge faster # in general. high_reg_model = ElasticNet(alpha=final_alpha * 10).fit(X, y) assert low_reg_model.n_iter_ > high_reg_model.n_iter_ # Fit the solution to the original, less regularized version of the # problem but from the solution of the highly regularized variant of # the problem as a better starting point. This should also converge # faster than the original model that starts from zero. warm_low_reg_model = deepcopy(high_reg_model) warm_low_reg_model.set_params(warm_start=True, alpha=final_alpha) warm_low_reg_model.fit(X, y) assert low_reg_model.n_iter_ > warm_low_reg_model.n_iter_ def test_random_descent(): # Test that both random and cyclic selection give the same results. # Ensure that the test models fully converge and check a wide # range of conditions. # This uses the coordinate descent algo using the gram trick. X, y, _, _ = build_dataset(n_samples=50, n_features=20) clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8) clf_cyclic.fit(X, y) clf_random = ElasticNet(selection='random', tol=1e-8, random_state=42) clf_random.fit(X, y) assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_) assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_) # This uses the descent algo without the gram trick clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8) clf_cyclic.fit(X.T, y[:20]) clf_random = ElasticNet(selection='random', tol=1e-8, random_state=42) clf_random.fit(X.T, y[:20]) assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_) assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_) # Sparse Case clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8) clf_cyclic.fit(sparse.csr_matrix(X), y) clf_random = ElasticNet(selection='random', tol=1e-8, random_state=42) clf_random.fit(sparse.csr_matrix(X), y) assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_) assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_) # Multioutput case. new_y = np.hstack((y[:, np.newaxis], y[:, np.newaxis])) clf_cyclic = MultiTaskElasticNet(selection='cyclic', tol=1e-8) clf_cyclic.fit(X, new_y) clf_random = MultiTaskElasticNet(selection='random', tol=1e-8, random_state=42) clf_random.fit(X, new_y) assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_) assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_) # Raise error when selection is not in cyclic or random. clf_random = ElasticNet(selection='invalid') assert_raises(ValueError, clf_random.fit, X, y) def test_enet_path_positive(): # Test positive parameter X, Y, _, _ = build_dataset(n_samples=50, n_features=50, n_targets=2) # For mono output # Test that the coefs returned by positive=True in enet_path are positive for path in [enet_path, lasso_path]: pos_path_coef = path(X, Y[:, 0], positive=True)[1] assert np.all(pos_path_coef >= 0) # For multi output, positive parameter is not allowed # Test that an error is raised for path in [enet_path, lasso_path]: assert_raises(ValueError, path, X, Y, positive=True) def test_sparse_dense_descent_paths(): # Test that dense and sparse input give the same input for descent paths. X, y, _, _ = build_dataset(n_samples=50, n_features=20) csr = sparse.csr_matrix(X) for path in [enet_path, lasso_path]: _, coefs, _ = path(X, y, fit_intercept=False) _, sparse_coefs, _ = path(csr, y, fit_intercept=False) assert_array_almost_equal(coefs, sparse_coefs) def test_check_input_false(): X, y, _, _ = build_dataset(n_samples=20, n_features=10) X = check_array(X, order='F', dtype='float64') y = check_array(X, order='F', dtype='float64') clf = ElasticNet(selection='cyclic', tol=1e-8) # Check that no error is raised if data is provided in the right format clf.fit(X, y, check_input=False) # With check_input=False, an exhaustive check is not made on y but its # dtype is still cast in _preprocess_data to X's dtype. So the test should # pass anyway X = check_array(X, order='F', dtype='float32') clf.fit(X, y, check_input=False) # With no input checking, providing X in C order should result in false # computation X = check_array(X, order='C', dtype='float64') assert_raises(ValueError, clf.fit, X, y, check_input=False) @pytest.mark.parametrize("check_input", [True, False]) def test_enet_copy_X_True(check_input): X, y, _, _ = build_dataset() X = X.copy(order='F') original_X = X.copy() enet = ElasticNet(copy_X=True) enet.fit(X, y, check_input=check_input) assert_array_equal(original_X, X) def test_enet_copy_X_False_check_input_False(): X, y, _, _ = build_dataset() X = X.copy(order='F') original_X = X.copy() enet = ElasticNet(copy_X=False) enet.fit(X, y, check_input=False) # No copying, X is overwritten assert np.any(np.not_equal(original_X, X)) def test_overrided_gram_matrix(): X, y, _, _ = build_dataset(n_samples=20, n_features=10) Gram = X.T.dot(X) clf = ElasticNet(selection='cyclic', tol=1e-8, precompute=Gram) assert_warns_message(UserWarning, "Gram matrix was provided but X was centered" " to fit intercept, " "or X was normalized : recomputing Gram matrix.", clf.fit, X, y) @pytest.mark.parametrize('model', [ElasticNet, Lasso]) def test_lasso_non_float_y(model): X = [[0, 0], [1, 1], [-1, -1]] y = [0, 1, 2] y_float = [0.0, 1.0, 2.0] clf = model(fit_intercept=False) clf.fit(X, y) clf_float = model(fit_intercept=False) clf_float.fit(X, y_float) assert_array_equal(clf.coef_, clf_float.coef_) def test_enet_float_precision(): # Generate dataset X, y, X_test, y_test = build_dataset(n_samples=20, n_features=10) # Here we have a small number of iterations, and thus the # ElasticNet might not converge. This is to speed up tests for normalize in [True, False]: for fit_intercept in [True, False]: coef = {} intercept = {} for dtype in [np.float64, np.float32]: clf = ElasticNet(alpha=0.5, max_iter=100, precompute=False, fit_intercept=fit_intercept, normalize=normalize) X = dtype(X) y = dtype(y) ignore_warnings(clf.fit)(X, y) coef[('simple', dtype)] = clf.coef_ intercept[('simple', dtype)] = clf.intercept_ assert clf.coef_.dtype == dtype # test precompute Gram array Gram = X.T.dot(X) clf_precompute = ElasticNet(alpha=0.5, max_iter=100, precompute=Gram, fit_intercept=fit_intercept, normalize=normalize) ignore_warnings(clf_precompute.fit)(X, y) assert_array_almost_equal(clf.coef_, clf_precompute.coef_) assert_array_almost_equal(clf.intercept_, clf_precompute.intercept_) # test multi task enet multi_y = np.hstack((y[:, np.newaxis], y[:, np.newaxis])) clf_multioutput = MultiTaskElasticNet( alpha=0.5, max_iter=100, fit_intercept=fit_intercept, normalize=normalize) clf_multioutput.fit(X, multi_y) coef[('multi', dtype)] = clf_multioutput.coef_ intercept[('multi', dtype)] = clf_multioutput.intercept_ assert clf.coef_.dtype == dtype for v in ['simple', 'multi']: assert_array_almost_equal(coef[(v, np.float32)], coef[(v, np.float64)], decimal=4) assert_array_almost_equal(intercept[(v, np.float32)], intercept[(v, np.float64)], decimal=4) def test_enet_l1_ratio(): # Test that an error message is raised if an estimator that # uses _alpha_grid is called with l1_ratio=0 msg = ("Automatic alpha grid generation is not supported for l1_ratio=0. " "Please supply a grid by providing your estimator with the " "appropriate `alphas=` argument.") X = np.array([[1, 2, 4, 5, 8], [3, 5, 7, 7, 8]]).T y = np.array([12, 10, 11, 21, 5]) assert_raise_message(ValueError, msg, ElasticNetCV( l1_ratio=0, random_state=42).fit, X, y) assert_raise_message(ValueError, msg, MultiTaskElasticNetCV( l1_ratio=0, random_state=42).fit, X, y[:, None]) # Test that l1_ratio=0 is allowed if we supply a grid manually alphas = [0.1, 10] estkwds = {'alphas': alphas, 'random_state': 42} est_desired = ElasticNetCV(l1_ratio=0.00001, **estkwds) est = ElasticNetCV(l1_ratio=0, **estkwds) with ignore_warnings(): est_desired.fit(X, y) est.fit(X, y) assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5) est_desired = MultiTaskElasticNetCV(l1_ratio=0.00001, **estkwds) est = MultiTaskElasticNetCV(l1_ratio=0, **estkwds) with ignore_warnings(): est.fit(X, y[:, None]) est_desired.fit(X, y[:, None]) assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5) def test_coef_shape_not_zero(): est_no_intercept = Lasso(fit_intercept=False) est_no_intercept.fit(np.c_[np.ones(3)], np.ones(3)) assert est_no_intercept.coef_.shape == (1,) def test_warm_start_multitask_lasso(): X, y, X_test, y_test = build_dataset() Y = np.c_[y, y] clf = MultiTaskLasso(alpha=0.1, max_iter=5, warm_start=True) ignore_warnings(clf.fit)(X, Y) ignore_warnings(clf.fit)(X, Y) # do a second round with 5 iterations clf2 = MultiTaskLasso(alpha=0.1, max_iter=10) ignore_warnings(clf2.fit)(X, Y) assert_array_almost_equal(clf2.coef_, clf.coef_) @pytest.mark.parametrize('klass, n_classes, kwargs', [(Lasso, 1, dict(precompute=True)), (Lasso, 1, dict(precompute=False)), (MultiTaskLasso, 2, dict()), (MultiTaskLasso, 2, dict())]) def test_enet_coordinate_descent(klass, n_classes, kwargs): """Test that a warning is issued if model does not converge""" clf = klass(max_iter=2, **kwargs) n_samples = 5 n_features = 2 X = np.ones((n_samples, n_features)) * 1e50 y = np.ones((n_samples, n_classes)) if klass == Lasso: y = y.ravel() assert_warns(ConvergenceWarning, clf.fit, X, y) def test_convergence_warnings(): random_state = np.random.RandomState(0) X = random_state.standard_normal((1000, 500)) y = random_state.standard_normal((1000, 3)) # check that the model fails to converge (a negative dual gap cannot occur) with pytest.warns(ConvergenceWarning): MultiTaskElasticNet(max_iter=1, tol=-1).fit(X, y) # check that the model converges w/o warnings with pytest.warns(None) as record: MultiTaskElasticNet(max_iter=1000).fit(X, y) assert not record.list def test_sparse_input_convergence_warning(): X, y, _, _ = build_dataset(n_samples=1000, n_features=500) with pytest.warns(ConvergenceWarning): ElasticNet(max_iter=1, tol=0).fit( sparse.csr_matrix(X, dtype=np.float32), y) # check that the model converges w/o warnings with pytest.warns(None) as record: Lasso(max_iter=1000).fit(sparse.csr_matrix(X, dtype=np.float32), y) assert not record.list @pytest.mark.parametrize("precompute, inner_precompute", [ (True, True), ('auto', False), (False, False), ]) def test_lassoCV_does_not_set_precompute(monkeypatch, precompute, inner_precompute): X, y, _, _ = build_dataset() calls = 0 class LassoMock(Lasso): def fit(self, X, y): super().fit(X, y) nonlocal calls calls += 1 assert self.precompute == inner_precompute monkeypatch.setattr("sklearn.linear_model._coordinate_descent.Lasso", LassoMock) clf = LassoCV(precompute=precompute) clf.fit(X, y) assert calls > 0 def test_multi_task_lasso_cv_dtype(): n_samples, n_features = 10, 3 rng = np.random.RandomState(42) X = rng.binomial(1, .5, size=(n_samples, n_features)) X = X.astype(int) # make it explicit that X is int y = X[:, [0, 0]].copy() est = MultiTaskLassoCV(n_alphas=5, fit_intercept=True).fit(X, y) assert_array_almost_equal(est.coef_, [[1, 0, 0]] * 2, decimal=3) @pytest.mark.parametrize('fit_intercept', [True, False]) @pytest.mark.parametrize('alpha', [0.01]) @pytest.mark.parametrize('normalize', [False, True]) @pytest.mark.parametrize('precompute', [False, True]) def test_enet_sample_weight_consistency(fit_intercept, alpha, normalize, precompute): """Test that the impact of sample_weight is consistent.""" rng = np.random.RandomState(0) n_samples, n_features = 10, 5 X = rng.rand(n_samples, n_features) y = rng.rand(n_samples) params = dict(alpha=alpha, fit_intercept=fit_intercept, precompute=precompute, tol=1e-6, l1_ratio=0.5) reg = ElasticNet(**params).fit(X, y) coef = reg.coef_.copy() if fit_intercept: intercept = reg.intercept_ # sample_weight=np.ones(..) should be equivalent to sample_weight=None sample_weight = np.ones_like(y) reg.fit(X, y, sample_weight=sample_weight) assert_allclose(reg.coef_, coef, rtol=1e-6) if fit_intercept: assert_allclose(reg.intercept_, intercept) # sample_weight=None should be equivalent to sample_weight = number sample_weight = 123. reg.fit(X, y, sample_weight=sample_weight) assert_allclose(reg.coef_, coef, rtol=1e-6) if fit_intercept: assert_allclose(reg.intercept_, intercept) # scaling of sample_weight should have no effect, cf. np.average() sample_weight = 2 * np.ones_like(y) reg.fit(X, y, sample_weight=sample_weight) assert_allclose(reg.coef_, coef, rtol=1e-6) if fit_intercept: assert_allclose(reg.intercept_, intercept) # setting one element of sample_weight to 0 is equivalent to removing # the corresponding sample sample_weight = np.ones_like(y) sample_weight[-1] = 0 reg.fit(X, y, sample_weight=sample_weight) coef1 = reg.coef_.copy() if fit_intercept: intercept1 = reg.intercept_ reg.fit(X[:-1], y[:-1]) assert_allclose(reg.coef_, coef1, rtol=1e-6) if fit_intercept: assert_allclose(reg.intercept_, intercept1) # check that multiplying sample_weight by 2 is equivalent # to repeating corresponding samples twice if sparse.issparse(X): X = X.toarray() X2 = np.concatenate([X, X[:n_samples//2]], axis=0) y2 = np.concatenate([y, y[:n_samples//2]]) sample_weight_1 = np.ones(len(y)) sample_weight_1[:n_samples//2] = 2 reg1 = ElasticNet(**params).fit( X, y, sample_weight=sample_weight_1 ) reg2 = ElasticNet(**params).fit( X2, y2, sample_weight=None ) assert_allclose(reg1.coef_, reg2.coef_) def test_enet_sample_weight_sparse(): reg = ElasticNet() X = sparse.csc_matrix(np.zeros((3, 2))) y = np.array([-1, 0, 1]) sw = np.array([1, 2, 3]) with pytest.raises(ValueError, match="Sample weights do not.*support " "sparse matrices"): reg.fit(X, y, sample_weight=sw, check_input=True) @pytest.mark.parametrize("backend", ["loky", "threading"]) @pytest.mark.parametrize("estimator", [ElasticNetCV, MultiTaskElasticNetCV, LassoCV, MultiTaskLassoCV]) def test_linear_models_cv_fit_for_all_backends(backend, estimator): # LinearModelsCV.fit performs inplace operations on input data which is # memmapped when using loky backend, causing an error due to unexpected # behavior of fancy indexing of read-only memmaps (cf. numpy#14132). if (parse_version(joblib.__version__) < parse_version('0.12') and backend == 'loky'): pytest.skip('loky backend does not exist in joblib <0.12') # Create a problem sufficiently large to cause memmapping (1MB). n_targets = 1 + (estimator in (MultiTaskElasticNetCV, MultiTaskLassoCV)) X, y = make_regression(20000, 10, n_targets=n_targets) with joblib.parallel_backend(backend=backend): estimator(n_jobs=2, cv=3).fit(X, y)