projektAI/venv/Lib/site-packages/sklearn/linear_model/tests/test_coordinate_descent.py
2021-06-06 22:13:05 +02:00

1219 lines
44 KiB
Python

# Authors: Olivier Grisel <olivier.grisel@ensta.org>
# Alexandre Gramfort <alexandre.gramfort@inria.fr>
# License: BSD 3 clause
import numpy as np
import pytest
from scipy import interpolate, sparse
from copy import deepcopy
import joblib
from sklearn.base import is_classifier
from sklearn.datasets import load_diabetes
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.exceptions import ConvergenceWarning
from sklearn.utils._testing import assert_allclose
from sklearn.utils._testing import assert_array_almost_equal
from sklearn.utils._testing import assert_almost_equal
from sklearn.utils._testing import assert_raises
from sklearn.utils._testing import assert_raises_regex
from sklearn.utils._testing import assert_raise_message
from sklearn.utils._testing import assert_warns
from sklearn.utils._testing import assert_warns_message
from sklearn.utils._testing import ignore_warnings
from sklearn.utils._testing import assert_array_equal
from sklearn.utils._testing import TempMemmap
from sklearn.utils.fixes import parse_version
from sklearn.linear_model import (
ARDRegression,
BayesianRidge,
ElasticNet,
ElasticNetCV,
enet_path,
Lars,
lars_path,
Lasso,
LassoCV,
LassoLars,
LassoLarsCV,
LassoLarsIC,
lasso_path,
LinearRegression,
MultiTaskElasticNet,
MultiTaskElasticNetCV,
MultiTaskLasso,
MultiTaskLassoCV,
OrthogonalMatchingPursuit,
Ridge,
RidgeClassifier,
RidgeCV,
)
from sklearn.linear_model._coordinate_descent import _set_order
from sklearn.utils import check_array
@pytest.mark.parametrize('l1_ratio', (-1, 2, None, 10, 'something_wrong'))
def test_l1_ratio_param_invalid(l1_ratio):
# Check that correct error is raised when l1_ratio in ElasticNet
# is outside the correct range
X = np.array([[-1.], [0.], [1.]])
Y = [-1, 0, 1] # just a straight line
msg = "l1_ratio must be between 0 and 1; got l1_ratio="
clf = ElasticNet(alpha=0.1, l1_ratio=l1_ratio)
with pytest.raises(ValueError, match=msg):
clf.fit(X, Y)
@pytest.mark.parametrize('order', ['C', 'F'])
@pytest.mark.parametrize('input_order', ['C', 'F'])
def test_set_order_dense(order, input_order):
"""Check that _set_order returns arrays with promised order."""
X = np.array([[0], [0], [0]], order=input_order)
y = np.array([0, 0, 0], order=input_order)
X2, y2 = _set_order(X, y, order=order)
if order == 'C':
assert X2.flags['C_CONTIGUOUS']
assert y2.flags['C_CONTIGUOUS']
elif order == 'F':
assert X2.flags['F_CONTIGUOUS']
assert y2.flags['F_CONTIGUOUS']
if order == input_order:
assert X is X2
assert y is y2
@pytest.mark.parametrize('order', ['C', 'F'])
@pytest.mark.parametrize('input_order', ['C', 'F'])
def test_set_order_sparse(order, input_order):
"""Check that _set_order returns sparse matrices in promised format."""
X = sparse.coo_matrix(np.array([[0], [0], [0]]))
y = sparse.coo_matrix(np.array([0, 0, 0]))
sparse_format = "csc" if input_order == "F" else "csr"
X = X.asformat(sparse_format)
y = X.asformat(sparse_format)
X2, y2 = _set_order(X, y, order=order)
if order == 'C':
assert sparse.isspmatrix_csr(X2)
assert sparse.isspmatrix_csr(y2)
elif order == 'F':
assert sparse.isspmatrix_csc(X2)
assert sparse.isspmatrix_csc(y2)
def test_lasso_zero():
# Check that the lasso can handle zero data without crashing
X = [[0], [0], [0]]
y = [0, 0, 0]
clf = Lasso(alpha=0.1).fit(X, y)
pred = clf.predict([[1], [2], [3]])
assert_array_almost_equal(clf.coef_, [0])
assert_array_almost_equal(pred, [0, 0, 0])
assert_almost_equal(clf.dual_gap_, 0)
def test_lasso_toy():
# Test Lasso on a toy example for various values of alpha.
# When validating this against glmnet notice that glmnet divides it
# against nobs.
X = [[-1], [0], [1]]
Y = [-1, 0, 1] # just a straight line
T = [[2], [3], [4]] # test sample
clf = Lasso(alpha=1e-8)
clf.fit(X, Y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [1])
assert_array_almost_equal(pred, [2, 3, 4])
assert_almost_equal(clf.dual_gap_, 0)
clf = Lasso(alpha=0.1)
clf.fit(X, Y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [.85])
assert_array_almost_equal(pred, [1.7, 2.55, 3.4])
assert_almost_equal(clf.dual_gap_, 0)
clf = Lasso(alpha=0.5)
clf.fit(X, Y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [.25])
assert_array_almost_equal(pred, [0.5, 0.75, 1.])
assert_almost_equal(clf.dual_gap_, 0)
clf = Lasso(alpha=1)
clf.fit(X, Y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [.0])
assert_array_almost_equal(pred, [0, 0, 0])
assert_almost_equal(clf.dual_gap_, 0)
def test_enet_toy():
# Test ElasticNet for various parameters of alpha and l1_ratio.
# Actually, the parameters alpha = 0 should not be allowed. However,
# we test it as a border case.
# ElasticNet is tested with and without precomputed Gram matrix
X = np.array([[-1.], [0.], [1.]])
Y = [-1, 0, 1] # just a straight line
T = [[2.], [3.], [4.]] # test sample
# this should be the same as lasso
clf = ElasticNet(alpha=1e-8, l1_ratio=1.0)
clf.fit(X, Y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [1])
assert_array_almost_equal(pred, [2, 3, 4])
assert_almost_equal(clf.dual_gap_, 0)
clf = ElasticNet(alpha=0.5, l1_ratio=0.3, max_iter=100,
precompute=False)
clf.fit(X, Y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
assert_almost_equal(clf.dual_gap_, 0)
clf.set_params(max_iter=100, precompute=True)
clf.fit(X, Y) # with Gram
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
assert_almost_equal(clf.dual_gap_, 0)
clf.set_params(max_iter=100, precompute=np.dot(X.T, X))
clf.fit(X, Y) # with Gram
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
assert_almost_equal(clf.dual_gap_, 0)
clf = ElasticNet(alpha=0.5, l1_ratio=0.5)
clf.fit(X, Y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [0.45454], 3)
assert_array_almost_equal(pred, [0.9090, 1.3636, 1.8181], 3)
assert_almost_equal(clf.dual_gap_, 0)
def build_dataset(n_samples=50, n_features=200, n_informative_features=10,
n_targets=1):
"""
build an ill-posed linear regression problem with many noisy features and
comparatively few samples
"""
random_state = np.random.RandomState(0)
if n_targets > 1:
w = random_state.randn(n_features, n_targets)
else:
w = random_state.randn(n_features)
w[n_informative_features:] = 0.0
X = random_state.randn(n_samples, n_features)
y = np.dot(X, w)
X_test = random_state.randn(n_samples, n_features)
y_test = np.dot(X_test, w)
return X, y, X_test, y_test
def test_lasso_cv():
X, y, X_test, y_test = build_dataset()
max_iter = 150
clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, cv=3).fit(X, y)
assert_almost_equal(clf.alpha_, 0.056, 2)
clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True,
cv=3)
clf.fit(X, y)
assert_almost_equal(clf.alpha_, 0.056, 2)
# Check that the lars and the coordinate descent implementation
# select a similar alpha
lars = LassoLarsCV(normalize=False, max_iter=30, cv=3).fit(X, y)
# for this we check that they don't fall in the grid of
# clf.alphas further than 1
assert np.abs(np.searchsorted(clf.alphas_[::-1], lars.alpha_) -
np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1
# check that they also give a similar MSE
mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.mse_path_.T)
np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(),
clf.mse_path_[5].mean(), significant=2)
# test set
assert clf.score(X_test, y_test) > 0.99
def test_lasso_cv_with_some_model_selection():
from sklearn.model_selection import ShuffleSplit
from sklearn import datasets
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target
pipe = make_pipeline(
StandardScaler(),
LassoCV(cv=ShuffleSplit(random_state=0))
)
pipe.fit(X, y)
def test_lasso_cv_positive_constraint():
X, y, X_test, y_test = build_dataset()
max_iter = 500
# Ensure the unconstrained fit has a negative coefficient
clf_unconstrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2,
n_jobs=1)
clf_unconstrained.fit(X, y)
assert min(clf_unconstrained.coef_) < 0
# On same data, constrained fit has non-negative coefficients
clf_constrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter,
positive=True, cv=2, n_jobs=1)
clf_constrained.fit(X, y)
assert min(clf_constrained.coef_) >= 0
@pytest.mark.parametrize(
"LinearModel, params",
[(Lasso, {"tol": 1e-16, "alpha": 0.1}),
(LassoLars, {"alpha": 0.1}),
(RidgeClassifier, {"solver": 'sparse_cg', "alpha": 0.1}),
(ElasticNet, {"tol": 1e-16, 'l1_ratio': 1, "alpha": 0.1}),
(ElasticNet, {"tol": 1e-16, 'l1_ratio': 0, "alpha": 0.1}),
(Ridge, {"solver": 'sparse_cg', 'tol': 1e-12, "alpha": 0.1}),
(BayesianRidge, {}),
(ARDRegression, {}),
(OrthogonalMatchingPursuit, {}),
(MultiTaskElasticNet, {"tol": 1e-16, 'l1_ratio': 1, "alpha": 0.1}),
(MultiTaskElasticNet, {"tol": 1e-16, 'l1_ratio': 0, "alpha": 0.1}),
(MultiTaskLasso, {"tol": 1e-16, "alpha": 0.1}),
(Lars, {}),
(LinearRegression, {}),
(LassoLarsIC, {})]
)
def test_model_pipeline_same_as_normalize_true(LinearModel, params):
# Test that linear models (LinearModel) set with normalize set to True are
# doing the same as the same linear model preceeded by StandardScaler
# in the pipeline and with normalize set to False
# normalize is True
model_name = LinearModel.__name__
model_normalize = LinearModel(normalize=True, fit_intercept=True, **params)
pipeline = make_pipeline(
StandardScaler(),
LinearModel(normalize=False, fit_intercept=True, **params)
)
is_multitask = model_normalize._get_tags()["multioutput_only"]
# prepare the data
n_samples, n_features = 100, 2
rng = np.random.RandomState(0)
w = rng.randn(n_features)
X = rng.randn(n_samples, n_features)
X += 20 # make features non-zero mean
y = X.dot(w)
# make classes out of regression
if is_classifier(model_normalize):
y[y > np.mean(y)] = -1
y[y > 0] = 1
if is_multitask:
y = np.stack((y, y), axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
if 'alpha' in params:
model_normalize.set_params(alpha=params['alpha'])
if model_name in ['Lasso', 'LassoLars', 'MultiTaskLasso']:
new_params = dict(
alpha=params['alpha'] * np.sqrt(X_train.shape[0]))
if model_name in ['Ridge', 'RidgeClassifier']:
new_params = dict(alpha=params['alpha'] * X_train.shape[0])
if model_name in ['ElasticNet', 'MultiTaskElasticNet']:
if params['l1_ratio'] == 1:
new_params = dict(
alpha=params['alpha'] * np.sqrt(X_train.shape[0]))
if params['l1_ratio'] == 0:
new_params = dict(alpha=params['alpha'] * X_train.shape[0])
if 'new_params' in locals():
pipeline[1].set_params(**new_params)
model_normalize.fit(X_train, y_train)
y_pred_normalize = model_normalize.predict(X_test)
pipeline.fit(X_train, y_train)
y_pred_standardize = pipeline.predict(X_test)
assert_allclose(
model_normalize.coef_ * pipeline[0].scale_, pipeline[1].coef_)
assert pipeline[1].intercept_ == pytest.approx(y_train.mean())
assert (model_normalize.intercept_ ==
pytest.approx(y_train.mean() -
model_normalize.coef_.dot(X_train.mean(0))))
assert_allclose(y_pred_normalize, y_pred_standardize)
@pytest.mark.parametrize(
"LinearModel, params",
[(Lasso, {"tol": 1e-16, "alpha": 0.1}),
(LassoCV, {"tol": 1e-16}),
(ElasticNetCV, {}),
(RidgeClassifier, {"solver": 'sparse_cg', "alpha": 0.1}),
(ElasticNet, {"tol": 1e-16, 'l1_ratio': 1, "alpha": 0.01}),
(ElasticNet, {"tol": 1e-16, 'l1_ratio': 0, "alpha": 0.01}),
(Ridge, {"solver": 'sparse_cg', 'tol': 1e-12, "alpha": 0.1}),
(LinearRegression, {}),
(RidgeCV, {})]
)
def test_model_pipeline_same_dense_and_sparse(LinearModel, params):
# Test that linear model preceeded by StandardScaler in the pipeline and
# with normalize set to False gives the same y_pred and the same .coef_
# given X sparse or dense
model_dense = make_pipeline(
StandardScaler(with_mean=False),
LinearModel(normalize=False, **params)
)
model_sparse = make_pipeline(
StandardScaler(with_mean=False),
LinearModel(normalize=False, **params)
)
# prepare the data
rng = np.random.RandomState(0)
n_samples = 200
n_features = 2
X = rng.randn(n_samples, n_features)
X[X < 0.1] = 0.
X_sparse = sparse.csr_matrix(X)
y = rng.rand(n_samples)
if is_classifier(model_dense):
y = np.sign(y)
model_dense.fit(X, y)
model_sparse.fit(X_sparse, y)
assert_allclose(model_sparse[1].coef_, model_dense[1].coef_)
y_pred_dense = model_dense.predict(X)
y_pred_sparse = model_sparse.predict(X_sparse)
assert_allclose(y_pred_dense, y_pred_sparse)
assert_allclose(model_dense[1].intercept_, model_sparse[1].intercept_)
def test_lasso_path_return_models_vs_new_return_gives_same_coefficients():
# Test that lasso_path with lars_path style output gives the
# same result
# Some toy data
X = np.array([[1, 2, 3.1], [2.3, 5.4, 4.3]]).T
y = np.array([1, 2, 3.1])
alphas = [5., 1., .5]
# Use lars_path and lasso_path(new output) with 1D linear interpolation
# to compute the same path
alphas_lars, _, coef_path_lars = lars_path(X, y, method='lasso')
coef_path_cont_lars = interpolate.interp1d(alphas_lars[::-1],
coef_path_lars[:, ::-1])
alphas_lasso2, coef_path_lasso2, _ = lasso_path(X, y, alphas=alphas,
return_models=False)
coef_path_cont_lasso = interpolate.interp1d(alphas_lasso2[::-1],
coef_path_lasso2[:, ::-1])
assert_array_almost_equal(
coef_path_cont_lasso(alphas), coef_path_cont_lars(alphas),
decimal=1)
def test_enet_path():
# We use a large number of samples and of informative features so that
# the l1_ratio selected is more toward ridge than lasso
X, y, X_test, y_test = build_dataset(n_samples=200, n_features=100,
n_informative_features=100)
max_iter = 150
# Here we have a small number of iterations, and thus the
# ElasticNet might not converge. This is to speed up tests
clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3,
l1_ratio=[0.5, 0.7], cv=3,
max_iter=max_iter)
ignore_warnings(clf.fit)(X, y)
# Well-conditioned settings, we should have selected our
# smallest penalty
assert_almost_equal(clf.alpha_, min(clf.alphas_))
# Non-sparse ground truth: we should have selected an elastic-net
# that is closer to ridge than to lasso
assert clf.l1_ratio_ == min(clf.l1_ratio)
clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3,
l1_ratio=[0.5, 0.7], cv=3,
max_iter=max_iter, precompute=True)
ignore_warnings(clf.fit)(X, y)
# Well-conditioned settings, we should have selected our
# smallest penalty
assert_almost_equal(clf.alpha_, min(clf.alphas_))
# Non-sparse ground truth: we should have selected an elastic-net
# that is closer to ridge than to lasso
assert clf.l1_ratio_ == min(clf.l1_ratio)
# We are in well-conditioned settings with low noise: we should
# have a good test-set performance
assert clf.score(X_test, y_test) > 0.99
# Multi-output/target case
X, y, X_test, y_test = build_dataset(n_features=10, n_targets=3)
clf = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7],
cv=3, max_iter=max_iter)
ignore_warnings(clf.fit)(X, y)
# We are in well-conditioned settings with low noise: we should
# have a good test-set performance
assert clf.score(X_test, y_test) > 0.99
assert clf.coef_.shape == (3, 10)
# Mono-output should have same cross-validated alpha_ and l1_ratio_
# in both cases.
X, y, _, _ = build_dataset(n_features=10)
clf1 = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
clf1.fit(X, y)
clf2 = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
clf2.fit(X, y[:, np.newaxis])
assert_almost_equal(clf1.l1_ratio_, clf2.l1_ratio_)
assert_almost_equal(clf1.alpha_, clf2.alpha_)
def test_path_parameters():
X, y, _, _ = build_dataset()
max_iter = 100
clf = ElasticNetCV(n_alphas=50, eps=1e-3, max_iter=max_iter,
l1_ratio=0.5, tol=1e-3)
clf.fit(X, y) # new params
assert_almost_equal(0.5, clf.l1_ratio)
assert 50 == clf.n_alphas
assert 50 == len(clf.alphas_)
def test_warm_start():
X, y, _, _ = build_dataset()
clf = ElasticNet(alpha=0.1, max_iter=5, warm_start=True)
ignore_warnings(clf.fit)(X, y)
ignore_warnings(clf.fit)(X, y) # do a second round with 5 iterations
clf2 = ElasticNet(alpha=0.1, max_iter=10)
ignore_warnings(clf2.fit)(X, y)
assert_array_almost_equal(clf2.coef_, clf.coef_)
def test_lasso_alpha_warning():
X = [[-1], [0], [1]]
Y = [-1, 0, 1] # just a straight line
clf = Lasso(alpha=0)
assert_warns(UserWarning, clf.fit, X, Y)
def test_lasso_positive_constraint():
X = [[-1], [0], [1]]
y = [1, 0, -1] # just a straight line with negative slope
lasso = Lasso(alpha=0.1, max_iter=1000, positive=True)
lasso.fit(X, y)
assert min(lasso.coef_) >= 0
lasso = Lasso(alpha=0.1, max_iter=1000, precompute=True, positive=True)
lasso.fit(X, y)
assert min(lasso.coef_) >= 0
def test_enet_positive_constraint():
X = [[-1], [0], [1]]
y = [1, 0, -1] # just a straight line with negative slope
enet = ElasticNet(alpha=0.1, max_iter=1000, positive=True)
enet.fit(X, y)
assert min(enet.coef_) >= 0
def test_enet_cv_positive_constraint():
X, y, X_test, y_test = build_dataset()
max_iter = 500
# Ensure the unconstrained fit has a negative coefficient
enetcv_unconstrained = ElasticNetCV(n_alphas=3, eps=1e-1,
max_iter=max_iter,
cv=2, n_jobs=1)
enetcv_unconstrained.fit(X, y)
assert min(enetcv_unconstrained.coef_) < 0
# On same data, constrained fit has non-negative coefficients
enetcv_constrained = ElasticNetCV(n_alphas=3, eps=1e-1, max_iter=max_iter,
cv=2, positive=True, n_jobs=1)
enetcv_constrained.fit(X, y)
assert min(enetcv_constrained.coef_) >= 0
def test_uniform_targets():
enet = ElasticNetCV(n_alphas=3)
m_enet = MultiTaskElasticNetCV(n_alphas=3)
lasso = LassoCV(n_alphas=3)
m_lasso = MultiTaskLassoCV(n_alphas=3)
models_single_task = (enet, lasso)
models_multi_task = (m_enet, m_lasso)
rng = np.random.RandomState(0)
X_train = rng.random_sample(size=(10, 3))
X_test = rng.random_sample(size=(10, 3))
y1 = np.empty(10)
y2 = np.empty((10, 2))
for model in models_single_task:
for y_values in (0, 5):
y1.fill(y_values)
assert_array_equal(model.fit(X_train, y1).predict(X_test), y1)
assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3)
for model in models_multi_task:
for y_values in (0, 5):
y2[:, 0].fill(y_values)
y2[:, 1].fill(2 * y_values)
assert_array_equal(model.fit(X_train, y2).predict(X_test), y2)
assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3)
def test_multi_task_lasso_and_enet():
X, y, X_test, y_test = build_dataset()
Y = np.c_[y, y]
# Y_test = np.c_[y_test, y_test]
clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y)
assert 0 < clf.dual_gap_ < 1e-5
assert_array_almost_equal(clf.coef_[0], clf.coef_[1])
clf = MultiTaskElasticNet(alpha=1, tol=1e-8).fit(X, Y)
assert 0 < clf.dual_gap_ < 1e-5
assert_array_almost_equal(clf.coef_[0], clf.coef_[1])
clf = MultiTaskElasticNet(alpha=1.0, tol=1e-8, max_iter=1)
assert_warns_message(ConvergenceWarning, 'did not converge', clf.fit, X, Y)
def test_lasso_readonly_data():
X = np.array([[-1], [0], [1]])
Y = np.array([-1, 0, 1]) # just a straight line
T = np.array([[2], [3], [4]]) # test sample
with TempMemmap((X, Y)) as (X, Y):
clf = Lasso(alpha=0.5)
clf.fit(X, Y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [.25])
assert_array_almost_equal(pred, [0.5, 0.75, 1.])
assert_almost_equal(clf.dual_gap_, 0)
def test_multi_task_lasso_readonly_data():
X, y, X_test, y_test = build_dataset()
Y = np.c_[y, y]
with TempMemmap((X, Y)) as (X, Y):
Y = np.c_[y, y]
clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y)
assert 0 < clf.dual_gap_ < 1e-5
assert_array_almost_equal(clf.coef_[0], clf.coef_[1])
def test_enet_multitarget():
n_targets = 3
X, y, _, _ = build_dataset(n_samples=10, n_features=8,
n_informative_features=10, n_targets=n_targets)
estimator = ElasticNet(alpha=0.01)
estimator.fit(X, y)
coef, intercept, dual_gap = (estimator.coef_, estimator.intercept_,
estimator.dual_gap_)
for k in range(n_targets):
estimator.fit(X, y[:, k])
assert_array_almost_equal(coef[k, :], estimator.coef_)
assert_array_almost_equal(intercept[k], estimator.intercept_)
assert_array_almost_equal(dual_gap[k], estimator.dual_gap_)
def test_multioutput_enetcv_error():
rng = np.random.RandomState(0)
X = rng.randn(10, 2)
y = rng.randn(10, 2)
clf = ElasticNetCV()
assert_raises(ValueError, clf.fit, X, y)
def test_multitask_enet_and_lasso_cv():
X, y, _, _ = build_dataset(n_features=50, n_targets=3)
clf = MultiTaskElasticNetCV(cv=3).fit(X, y)
assert_almost_equal(clf.alpha_, 0.00556, 3)
clf = MultiTaskLassoCV(cv=3).fit(X, y)
assert_almost_equal(clf.alpha_, 0.00278, 3)
X, y, _, _ = build_dataset(n_targets=3)
clf = MultiTaskElasticNetCV(n_alphas=10, eps=1e-3, max_iter=100,
l1_ratio=[0.3, 0.5], tol=1e-3, cv=3)
clf.fit(X, y)
assert 0.5 == clf.l1_ratio_
assert (3, X.shape[1]) == clf.coef_.shape
assert (3, ) == clf.intercept_.shape
assert (2, 10, 3) == clf.mse_path_.shape
assert (2, 10) == clf.alphas_.shape
X, y, _, _ = build_dataset(n_targets=3)
clf = MultiTaskLassoCV(n_alphas=10, eps=1e-3, max_iter=100, tol=1e-3, cv=3)
clf.fit(X, y)
assert (3, X.shape[1]) == clf.coef_.shape
assert (3, ) == clf.intercept_.shape
assert (10, 3) == clf.mse_path_.shape
assert 10 == len(clf.alphas_)
def test_1d_multioutput_enet_and_multitask_enet_cv():
X, y, _, _ = build_dataset(n_features=10)
y = y[:, np.newaxis]
clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
clf.fit(X, y[:, 0])
clf1 = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
clf1.fit(X, y)
assert_almost_equal(clf.l1_ratio_, clf1.l1_ratio_)
assert_almost_equal(clf.alpha_, clf1.alpha_)
assert_almost_equal(clf.coef_, clf1.coef_[0])
assert_almost_equal(clf.intercept_, clf1.intercept_[0])
def test_1d_multioutput_lasso_and_multitask_lasso_cv():
X, y, _, _ = build_dataset(n_features=10)
y = y[:, np.newaxis]
clf = LassoCV(n_alphas=5, eps=2e-3)
clf.fit(X, y[:, 0])
clf1 = MultiTaskLassoCV(n_alphas=5, eps=2e-3)
clf1.fit(X, y)
assert_almost_equal(clf.alpha_, clf1.alpha_)
assert_almost_equal(clf.coef_, clf1.coef_[0])
assert_almost_equal(clf.intercept_, clf1.intercept_[0])
def test_sparse_input_dtype_enet_and_lassocv():
X, y, _, _ = build_dataset(n_features=10)
clf = ElasticNetCV(n_alphas=5)
clf.fit(sparse.csr_matrix(X), y)
clf1 = ElasticNetCV(n_alphas=5)
clf1.fit(sparse.csr_matrix(X, dtype=np.float32), y)
assert_almost_equal(clf.alpha_, clf1.alpha_, decimal=6)
assert_almost_equal(clf.coef_, clf1.coef_, decimal=6)
clf = LassoCV(n_alphas=5)
clf.fit(sparse.csr_matrix(X), y)
clf1 = LassoCV(n_alphas=5)
clf1.fit(sparse.csr_matrix(X, dtype=np.float32), y)
assert_almost_equal(clf.alpha_, clf1.alpha_, decimal=6)
assert_almost_equal(clf.coef_, clf1.coef_, decimal=6)
def test_precompute_invalid_argument():
X, y, _, _ = build_dataset()
for clf in [ElasticNetCV(precompute="invalid"),
LassoCV(precompute="invalid")]:
assert_raises_regex(ValueError, ".*should be.*True.*False.*auto.*"
"array-like.*Got 'invalid'", clf.fit, X, y)
# Precompute = 'auto' is not supported for ElasticNet and Lasso
assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*"
"Got 'auto'", ElasticNet(precompute='auto').fit, X, y)
assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*"
"Got 'auto'", Lasso(precompute='auto').fit, X, y)
def test_warm_start_convergence():
X, y, _, _ = build_dataset()
model = ElasticNet(alpha=1e-3, tol=1e-3).fit(X, y)
n_iter_reference = model.n_iter_
# This dataset is not trivial enough for the model to converge in one pass.
assert n_iter_reference > 2
# Check that n_iter_ is invariant to multiple calls to fit
# when warm_start=False, all else being equal.
model.fit(X, y)
n_iter_cold_start = model.n_iter_
assert n_iter_cold_start == n_iter_reference
# Fit the same model again, using a warm start: the optimizer just performs
# a single pass before checking that it has already converged
model.set_params(warm_start=True)
model.fit(X, y)
n_iter_warm_start = model.n_iter_
assert n_iter_warm_start == 1
def test_warm_start_convergence_with_regularizer_decrement():
X, y = load_diabetes(return_X_y=True)
# Train a model to converge on a lightly regularized problem
final_alpha = 1e-5
low_reg_model = ElasticNet(alpha=final_alpha).fit(X, y)
# Fitting a new model on a more regularized version of the same problem.
# Fitting with high regularization is easier it should converge faster
# in general.
high_reg_model = ElasticNet(alpha=final_alpha * 10).fit(X, y)
assert low_reg_model.n_iter_ > high_reg_model.n_iter_
# Fit the solution to the original, less regularized version of the
# problem but from the solution of the highly regularized variant of
# the problem as a better starting point. This should also converge
# faster than the original model that starts from zero.
warm_low_reg_model = deepcopy(high_reg_model)
warm_low_reg_model.set_params(warm_start=True, alpha=final_alpha)
warm_low_reg_model.fit(X, y)
assert low_reg_model.n_iter_ > warm_low_reg_model.n_iter_
def test_random_descent():
# Test that both random and cyclic selection give the same results.
# Ensure that the test models fully converge and check a wide
# range of conditions.
# This uses the coordinate descent algo using the gram trick.
X, y, _, _ = build_dataset(n_samples=50, n_features=20)
clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8)
clf_cyclic.fit(X, y)
clf_random = ElasticNet(selection='random', tol=1e-8, random_state=42)
clf_random.fit(X, y)
assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_)
assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_)
# This uses the descent algo without the gram trick
clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8)
clf_cyclic.fit(X.T, y[:20])
clf_random = ElasticNet(selection='random', tol=1e-8, random_state=42)
clf_random.fit(X.T, y[:20])
assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_)
assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_)
# Sparse Case
clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8)
clf_cyclic.fit(sparse.csr_matrix(X), y)
clf_random = ElasticNet(selection='random', tol=1e-8, random_state=42)
clf_random.fit(sparse.csr_matrix(X), y)
assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_)
assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_)
# Multioutput case.
new_y = np.hstack((y[:, np.newaxis], y[:, np.newaxis]))
clf_cyclic = MultiTaskElasticNet(selection='cyclic', tol=1e-8)
clf_cyclic.fit(X, new_y)
clf_random = MultiTaskElasticNet(selection='random', tol=1e-8,
random_state=42)
clf_random.fit(X, new_y)
assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_)
assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_)
# Raise error when selection is not in cyclic or random.
clf_random = ElasticNet(selection='invalid')
assert_raises(ValueError, clf_random.fit, X, y)
def test_enet_path_positive():
# Test positive parameter
X, Y, _, _ = build_dataset(n_samples=50, n_features=50, n_targets=2)
# For mono output
# Test that the coefs returned by positive=True in enet_path are positive
for path in [enet_path, lasso_path]:
pos_path_coef = path(X, Y[:, 0], positive=True)[1]
assert np.all(pos_path_coef >= 0)
# For multi output, positive parameter is not allowed
# Test that an error is raised
for path in [enet_path, lasso_path]:
assert_raises(ValueError, path, X, Y, positive=True)
def test_sparse_dense_descent_paths():
# Test that dense and sparse input give the same input for descent paths.
X, y, _, _ = build_dataset(n_samples=50, n_features=20)
csr = sparse.csr_matrix(X)
for path in [enet_path, lasso_path]:
_, coefs, _ = path(X, y, fit_intercept=False)
_, sparse_coefs, _ = path(csr, y, fit_intercept=False)
assert_array_almost_equal(coefs, sparse_coefs)
def test_check_input_false():
X, y, _, _ = build_dataset(n_samples=20, n_features=10)
X = check_array(X, order='F', dtype='float64')
y = check_array(X, order='F', dtype='float64')
clf = ElasticNet(selection='cyclic', tol=1e-8)
# Check that no error is raised if data is provided in the right format
clf.fit(X, y, check_input=False)
# With check_input=False, an exhaustive check is not made on y but its
# dtype is still cast in _preprocess_data to X's dtype. So the test should
# pass anyway
X = check_array(X, order='F', dtype='float32')
clf.fit(X, y, check_input=False)
# With no input checking, providing X in C order should result in false
# computation
X = check_array(X, order='C', dtype='float64')
assert_raises(ValueError, clf.fit, X, y, check_input=False)
@pytest.mark.parametrize("check_input", [True, False])
def test_enet_copy_X_True(check_input):
X, y, _, _ = build_dataset()
X = X.copy(order='F')
original_X = X.copy()
enet = ElasticNet(copy_X=True)
enet.fit(X, y, check_input=check_input)
assert_array_equal(original_X, X)
def test_enet_copy_X_False_check_input_False():
X, y, _, _ = build_dataset()
X = X.copy(order='F')
original_X = X.copy()
enet = ElasticNet(copy_X=False)
enet.fit(X, y, check_input=False)
# No copying, X is overwritten
assert np.any(np.not_equal(original_X, X))
def test_overrided_gram_matrix():
X, y, _, _ = build_dataset(n_samples=20, n_features=10)
Gram = X.T.dot(X)
clf = ElasticNet(selection='cyclic', tol=1e-8, precompute=Gram)
assert_warns_message(UserWarning,
"Gram matrix was provided but X was centered"
" to fit intercept, "
"or X was normalized : recomputing Gram matrix.",
clf.fit, X, y)
@pytest.mark.parametrize('model', [ElasticNet, Lasso])
def test_lasso_non_float_y(model):
X = [[0, 0], [1, 1], [-1, -1]]
y = [0, 1, 2]
y_float = [0.0, 1.0, 2.0]
clf = model(fit_intercept=False)
clf.fit(X, y)
clf_float = model(fit_intercept=False)
clf_float.fit(X, y_float)
assert_array_equal(clf.coef_, clf_float.coef_)
def test_enet_float_precision():
# Generate dataset
X, y, X_test, y_test = build_dataset(n_samples=20, n_features=10)
# Here we have a small number of iterations, and thus the
# ElasticNet might not converge. This is to speed up tests
for normalize in [True, False]:
for fit_intercept in [True, False]:
coef = {}
intercept = {}
for dtype in [np.float64, np.float32]:
clf = ElasticNet(alpha=0.5, max_iter=100, precompute=False,
fit_intercept=fit_intercept,
normalize=normalize)
X = dtype(X)
y = dtype(y)
ignore_warnings(clf.fit)(X, y)
coef[('simple', dtype)] = clf.coef_
intercept[('simple', dtype)] = clf.intercept_
assert clf.coef_.dtype == dtype
# test precompute Gram array
Gram = X.T.dot(X)
clf_precompute = ElasticNet(alpha=0.5, max_iter=100,
precompute=Gram,
fit_intercept=fit_intercept,
normalize=normalize)
ignore_warnings(clf_precompute.fit)(X, y)
assert_array_almost_equal(clf.coef_, clf_precompute.coef_)
assert_array_almost_equal(clf.intercept_,
clf_precompute.intercept_)
# test multi task enet
multi_y = np.hstack((y[:, np.newaxis], y[:, np.newaxis]))
clf_multioutput = MultiTaskElasticNet(
alpha=0.5, max_iter=100, fit_intercept=fit_intercept,
normalize=normalize)
clf_multioutput.fit(X, multi_y)
coef[('multi', dtype)] = clf_multioutput.coef_
intercept[('multi', dtype)] = clf_multioutput.intercept_
assert clf.coef_.dtype == dtype
for v in ['simple', 'multi']:
assert_array_almost_equal(coef[(v, np.float32)],
coef[(v, np.float64)],
decimal=4)
assert_array_almost_equal(intercept[(v, np.float32)],
intercept[(v, np.float64)],
decimal=4)
def test_enet_l1_ratio():
# Test that an error message is raised if an estimator that
# uses _alpha_grid is called with l1_ratio=0
msg = ("Automatic alpha grid generation is not supported for l1_ratio=0. "
"Please supply a grid by providing your estimator with the "
"appropriate `alphas=` argument.")
X = np.array([[1, 2, 4, 5, 8], [3, 5, 7, 7, 8]]).T
y = np.array([12, 10, 11, 21, 5])
assert_raise_message(ValueError, msg, ElasticNetCV(
l1_ratio=0, random_state=42).fit, X, y)
assert_raise_message(ValueError, msg, MultiTaskElasticNetCV(
l1_ratio=0, random_state=42).fit, X, y[:, None])
# Test that l1_ratio=0 is allowed if we supply a grid manually
alphas = [0.1, 10]
estkwds = {'alphas': alphas, 'random_state': 42}
est_desired = ElasticNetCV(l1_ratio=0.00001, **estkwds)
est = ElasticNetCV(l1_ratio=0, **estkwds)
with ignore_warnings():
est_desired.fit(X, y)
est.fit(X, y)
assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5)
est_desired = MultiTaskElasticNetCV(l1_ratio=0.00001, **estkwds)
est = MultiTaskElasticNetCV(l1_ratio=0, **estkwds)
with ignore_warnings():
est.fit(X, y[:, None])
est_desired.fit(X, y[:, None])
assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5)
def test_coef_shape_not_zero():
est_no_intercept = Lasso(fit_intercept=False)
est_no_intercept.fit(np.c_[np.ones(3)], np.ones(3))
assert est_no_intercept.coef_.shape == (1,)
def test_warm_start_multitask_lasso():
X, y, X_test, y_test = build_dataset()
Y = np.c_[y, y]
clf = MultiTaskLasso(alpha=0.1, max_iter=5, warm_start=True)
ignore_warnings(clf.fit)(X, Y)
ignore_warnings(clf.fit)(X, Y) # do a second round with 5 iterations
clf2 = MultiTaskLasso(alpha=0.1, max_iter=10)
ignore_warnings(clf2.fit)(X, Y)
assert_array_almost_equal(clf2.coef_, clf.coef_)
@pytest.mark.parametrize('klass, n_classes, kwargs',
[(Lasso, 1, dict(precompute=True)),
(Lasso, 1, dict(precompute=False)),
(MultiTaskLasso, 2, dict()),
(MultiTaskLasso, 2, dict())])
def test_enet_coordinate_descent(klass, n_classes, kwargs):
"""Test that a warning is issued if model does not converge"""
clf = klass(max_iter=2, **kwargs)
n_samples = 5
n_features = 2
X = np.ones((n_samples, n_features)) * 1e50
y = np.ones((n_samples, n_classes))
if klass == Lasso:
y = y.ravel()
assert_warns(ConvergenceWarning, clf.fit, X, y)
def test_convergence_warnings():
random_state = np.random.RandomState(0)
X = random_state.standard_normal((1000, 500))
y = random_state.standard_normal((1000, 3))
# check that the model fails to converge (a negative dual gap cannot occur)
with pytest.warns(ConvergenceWarning):
MultiTaskElasticNet(max_iter=1, tol=-1).fit(X, y)
# check that the model converges w/o warnings
with pytest.warns(None) as record:
MultiTaskElasticNet(max_iter=1000).fit(X, y)
assert not record.list
def test_sparse_input_convergence_warning():
X, y, _, _ = build_dataset(n_samples=1000, n_features=500)
with pytest.warns(ConvergenceWarning):
ElasticNet(max_iter=1, tol=0).fit(
sparse.csr_matrix(X, dtype=np.float32), y)
# check that the model converges w/o warnings
with pytest.warns(None) as record:
Lasso(max_iter=1000).fit(sparse.csr_matrix(X, dtype=np.float32), y)
assert not record.list
@pytest.mark.parametrize("precompute, inner_precompute", [
(True, True),
('auto', False),
(False, False),
])
def test_lassoCV_does_not_set_precompute(monkeypatch, precompute,
inner_precompute):
X, y, _, _ = build_dataset()
calls = 0
class LassoMock(Lasso):
def fit(self, X, y):
super().fit(X, y)
nonlocal calls
calls += 1
assert self.precompute == inner_precompute
monkeypatch.setattr("sklearn.linear_model._coordinate_descent.Lasso",
LassoMock)
clf = LassoCV(precompute=precompute)
clf.fit(X, y)
assert calls > 0
def test_multi_task_lasso_cv_dtype():
n_samples, n_features = 10, 3
rng = np.random.RandomState(42)
X = rng.binomial(1, .5, size=(n_samples, n_features))
X = X.astype(int) # make it explicit that X is int
y = X[:, [0, 0]].copy()
est = MultiTaskLassoCV(n_alphas=5, fit_intercept=True).fit(X, y)
assert_array_almost_equal(est.coef_, [[1, 0, 0]] * 2, decimal=3)
@pytest.mark.parametrize('fit_intercept', [True, False])
@pytest.mark.parametrize('alpha', [0.01])
@pytest.mark.parametrize('normalize', [False, True])
@pytest.mark.parametrize('precompute', [False, True])
def test_enet_sample_weight_consistency(fit_intercept, alpha, normalize,
precompute):
"""Test that the impact of sample_weight is consistent."""
rng = np.random.RandomState(0)
n_samples, n_features = 10, 5
X = rng.rand(n_samples, n_features)
y = rng.rand(n_samples)
params = dict(alpha=alpha, fit_intercept=fit_intercept,
precompute=precompute, tol=1e-6, l1_ratio=0.5)
reg = ElasticNet(**params).fit(X, y)
coef = reg.coef_.copy()
if fit_intercept:
intercept = reg.intercept_
# sample_weight=np.ones(..) should be equivalent to sample_weight=None
sample_weight = np.ones_like(y)
reg.fit(X, y, sample_weight=sample_weight)
assert_allclose(reg.coef_, coef, rtol=1e-6)
if fit_intercept:
assert_allclose(reg.intercept_, intercept)
# sample_weight=None should be equivalent to sample_weight = number
sample_weight = 123.
reg.fit(X, y, sample_weight=sample_weight)
assert_allclose(reg.coef_, coef, rtol=1e-6)
if fit_intercept:
assert_allclose(reg.intercept_, intercept)
# scaling of sample_weight should have no effect, cf. np.average()
sample_weight = 2 * np.ones_like(y)
reg.fit(X, y, sample_weight=sample_weight)
assert_allclose(reg.coef_, coef, rtol=1e-6)
if fit_intercept:
assert_allclose(reg.intercept_, intercept)
# setting one element of sample_weight to 0 is equivalent to removing
# the corresponding sample
sample_weight = np.ones_like(y)
sample_weight[-1] = 0
reg.fit(X, y, sample_weight=sample_weight)
coef1 = reg.coef_.copy()
if fit_intercept:
intercept1 = reg.intercept_
reg.fit(X[:-1], y[:-1])
assert_allclose(reg.coef_, coef1, rtol=1e-6)
if fit_intercept:
assert_allclose(reg.intercept_, intercept1)
# check that multiplying sample_weight by 2 is equivalent
# to repeating corresponding samples twice
if sparse.issparse(X):
X = X.toarray()
X2 = np.concatenate([X, X[:n_samples//2]], axis=0)
y2 = np.concatenate([y, y[:n_samples//2]])
sample_weight_1 = np.ones(len(y))
sample_weight_1[:n_samples//2] = 2
reg1 = ElasticNet(**params).fit(
X, y, sample_weight=sample_weight_1
)
reg2 = ElasticNet(**params).fit(
X2, y2, sample_weight=None
)
assert_allclose(reg1.coef_, reg2.coef_)
def test_enet_sample_weight_sparse():
reg = ElasticNet()
X = sparse.csc_matrix(np.zeros((3, 2)))
y = np.array([-1, 0, 1])
sw = np.array([1, 2, 3])
with pytest.raises(ValueError, match="Sample weights do not.*support "
"sparse matrices"):
reg.fit(X, y, sample_weight=sw, check_input=True)
@pytest.mark.parametrize("backend", ["loky", "threading"])
@pytest.mark.parametrize("estimator",
[ElasticNetCV, MultiTaskElasticNetCV,
LassoCV, MultiTaskLassoCV])
def test_linear_models_cv_fit_for_all_backends(backend, estimator):
# LinearModelsCV.fit performs inplace operations on input data which is
# memmapped when using loky backend, causing an error due to unexpected
# behavior of fancy indexing of read-only memmaps (cf. numpy#14132).
if (parse_version(joblib.__version__) < parse_version('0.12')
and backend == 'loky'):
pytest.skip('loky backend does not exist in joblib <0.12')
# Create a problem sufficiently large to cause memmapping (1MB).
n_targets = 1 + (estimator in (MultiTaskElasticNetCV, MultiTaskLassoCV))
X, y = make_regression(20000, 10, n_targets=n_targets)
with joblib.parallel_backend(backend=backend):
estimator(n_jobs=2, cv=3).fit(X, y)