projektAI/venv/Lib/site-packages/sklearn/linear_model/tests/test_coordinate_descent.py

# Authors: Olivier Grisel <olivier.grisel@ensta.org>
#          Alexandre Gramfort <alexandre.gramfort@inria.fr>
# License: BSD 3 clause

import numpy as np
import pytest
from scipy import interpolate, sparse
from copy import deepcopy
import joblib

from sklearn.base import is_classifier
from sklearn.datasets import load_diabetes
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.exceptions import ConvergenceWarning
from sklearn.utils._testing import assert_allclose
from sklearn.utils._testing import assert_array_almost_equal
from sklearn.utils._testing import assert_almost_equal
from sklearn.utils._testing import assert_raises
from sklearn.utils._testing import assert_raises_regex
from sklearn.utils._testing import assert_raise_message
from sklearn.utils._testing import assert_warns
from sklearn.utils._testing import assert_warns_message
from sklearn.utils._testing import ignore_warnings
from sklearn.utils._testing import assert_array_equal
from sklearn.utils._testing import TempMemmap
from sklearn.utils.fixes import parse_version

from sklearn.linear_model import (
    ARDRegression,
    BayesianRidge,
    ElasticNet,
    ElasticNetCV,
    enet_path,
    Lars,
    lars_path,
    Lasso,
    LassoCV,
    LassoLars,
    LassoLarsCV,
    LassoLarsIC,
    lasso_path,
    LinearRegression,
    MultiTaskElasticNet,
    MultiTaskElasticNetCV,
    MultiTaskLasso,
    MultiTaskLassoCV,
    OrthogonalMatchingPursuit,
    Ridge,
    RidgeClassifier,
    RidgeCV,
)

from sklearn.linear_model._coordinate_descent import _set_order
from sklearn.utils import check_array


@pytest.mark.parametrize('l1_ratio', (-1, 2, None, 10, 'something_wrong'))
def test_l1_ratio_param_invalid(l1_ratio):
    # Check that correct error is raised when l1_ratio in ElasticNet
    # is outside the correct range
    X = np.array([[-1.], [0.], [1.]])
    Y = [-1, 0, 1]       # just a straight line

    msg = "l1_ratio must be between 0 and 1; got l1_ratio="
    clf = ElasticNet(alpha=0.1, l1_ratio=l1_ratio)
    with pytest.raises(ValueError, match=msg):
        clf.fit(X, Y)


@pytest.mark.parametrize('order', ['C', 'F'])
@pytest.mark.parametrize('input_order', ['C', 'F'])
def test_set_order_dense(order, input_order):
    """Check that _set_order returns arrays with promised order."""
    X = np.array([[0], [0], [0]], order=input_order)
    y = np.array([0, 0, 0], order=input_order)
    X2, y2 = _set_order(X, y, order=order)
    if order == 'C':
        assert X2.flags['C_CONTIGUOUS']
        assert y2.flags['C_CONTIGUOUS']
    elif order == 'F':
        assert X2.flags['F_CONTIGUOUS']
        assert y2.flags['F_CONTIGUOUS']

    if order == input_order:
        assert X is X2
        assert y is y2


@pytest.mark.parametrize('order', ['C', 'F'])
@pytest.mark.parametrize('input_order', ['C', 'F'])
def test_set_order_sparse(order, input_order):
    """Check that _set_order returns sparse matrices in promised format."""
    X = sparse.coo_matrix(np.array([[0], [0], [0]]))
    y = sparse.coo_matrix(np.array([0, 0, 0]))
    sparse_format = "csc" if input_order == "F" else "csr"
    X = X.asformat(sparse_format)
    y = X.asformat(sparse_format)
    X2, y2 = _set_order(X, y, order=order)
    if order == 'C':
        assert sparse.isspmatrix_csr(X2)
        assert sparse.isspmatrix_csr(y2)
    elif order == 'F':
        assert sparse.isspmatrix_csc(X2)
        assert sparse.isspmatrix_csc(y2)


def test_lasso_zero():
    # Check that the lasso can handle zero data without crashing
    X = [[0], [0], [0]]
    y = [0, 0, 0]
    clf = Lasso(alpha=0.1).fit(X, y)
    pred = clf.predict([[1], [2], [3]])
    assert_array_almost_equal(clf.coef_, [0])
    assert_array_almost_equal(pred, [0, 0, 0])
    assert_almost_equal(clf.dual_gap_, 0)


def test_lasso_toy():
    # Test Lasso on a toy example for various values of alpha.
    # When validating this against glmnet notice that glmnet divides it
    # against nobs.

    X = [[-1], [0], [1]]
    Y = [-1, 0, 1]       # just a straight line
    T = [[2], [3], [4]]  # test sample

    clf = Lasso(alpha=1e-8)
    clf.fit(X, Y)
    pred = clf.predict(T)
    assert_array_almost_equal(clf.coef_, [1])
    assert_array_almost_equal(pred, [2, 3, 4])
    assert_almost_equal(clf.dual_gap_, 0)

    clf = Lasso(alpha=0.1)
    clf.fit(X, Y)
    pred = clf.predict(T)
    assert_array_almost_equal(clf.coef_, [.85])
    assert_array_almost_equal(pred, [1.7, 2.55, 3.4])
    assert_almost_equal(clf.dual_gap_, 0)

    clf = Lasso(alpha=0.5)
    clf.fit(X, Y)
    pred = clf.predict(T)
    assert_array_almost_equal(clf.coef_, [.25])
    assert_array_almost_equal(pred, [0.5, 0.75, 1.])
    assert_almost_equal(clf.dual_gap_, 0)

    clf = Lasso(alpha=1)
    clf.fit(X, Y)
    pred = clf.predict(T)
    assert_array_almost_equal(clf.coef_, [.0])
    assert_array_almost_equal(pred, [0, 0, 0])
    assert_almost_equal(clf.dual_gap_, 0)


def test_enet_toy():
    # Test ElasticNet for various parameters of alpha and l1_ratio.
    # Actually, the parameters alpha = 0 should not be allowed. However,
    # we test it as a border case.
    # ElasticNet is tested with and without precomputed Gram matrix

    X = np.array([[-1.], [0.], [1.]])
    Y = [-1, 0, 1]       # just a straight line
    T = [[2.], [3.], [4.]]  # test sample

    # this should be the same as lasso
    clf = ElasticNet(alpha=1e-8, l1_ratio=1.0)
    clf.fit(X, Y)
    pred = clf.predict(T)
    assert_array_almost_equal(clf.coef_, [1])
    assert_array_almost_equal(pred, [2, 3, 4])
    assert_almost_equal(clf.dual_gap_, 0)

    clf = ElasticNet(alpha=0.5, l1_ratio=0.3, max_iter=100,
                     precompute=False)
    clf.fit(X, Y)
    pred = clf.predict(T)
    assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
    assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
    assert_almost_equal(clf.dual_gap_, 0)

    clf.set_params(max_iter=100, precompute=True)
    clf.fit(X, Y)  # with Gram
    pred = clf.predict(T)
    assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
    assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
    assert_almost_equal(clf.dual_gap_, 0)

    clf.set_params(max_iter=100, precompute=np.dot(X.T, X))
    clf.fit(X, Y)  # with Gram
    pred = clf.predict(T)
    assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
    assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
    assert_almost_equal(clf.dual_gap_, 0)

    clf = ElasticNet(alpha=0.5, l1_ratio=0.5)
    clf.fit(X, Y)
    pred = clf.predict(T)
    assert_array_almost_equal(clf.coef_, [0.45454], 3)
    assert_array_almost_equal(pred, [0.9090, 1.3636, 1.8181], 3)
    assert_almost_equal(clf.dual_gap_, 0)


def build_dataset(n_samples=50, n_features=200, n_informative_features=10,
                  n_targets=1):
    """
    build an ill-posed linear regression problem with many noisy features and
    comparatively few samples
    """
    random_state = np.random.RandomState(0)
    if n_targets > 1:
        w = random_state.randn(n_features, n_targets)
    else:
        w = random_state.randn(n_features)
    w[n_informative_features:] = 0.0
    X = random_state.randn(n_samples, n_features)
    y = np.dot(X, w)
    X_test = random_state.randn(n_samples, n_features)
    y_test = np.dot(X_test, w)
    return X, y, X_test, y_test


def test_lasso_cv():
    X, y, X_test, y_test = build_dataset()
    max_iter = 150
    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, cv=3).fit(X, y)
    assert_almost_equal(clf.alpha_, 0.056, 2)

    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True,
                  cv=3)
    clf.fit(X, y)
    assert_almost_equal(clf.alpha_, 0.056, 2)

    # Check that the lars and the coordinate descent implementation
    # select a similar alpha
    lars = LassoLarsCV(normalize=False, max_iter=30, cv=3).fit(X, y)
    # for this we check that they don't fall in the grid of
    # clf.alphas further than 1
    assert np.abs(np.searchsorted(clf.alphas_[::-1], lars.alpha_) -
                  np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1
    # check that they also give a similar MSE
    mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.mse_path_.T)
    np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(),
                                   clf.mse_path_[5].mean(), significant=2)

    # test set
    assert clf.score(X_test, y_test) > 0.99


def test_lasso_cv_with_some_model_selection():
    from sklearn.model_selection import ShuffleSplit
    from sklearn import datasets

    diabetes = datasets.load_diabetes()
    X = diabetes.data
    y = diabetes.target

    pipe = make_pipeline(
        StandardScaler(),
        LassoCV(cv=ShuffleSplit(random_state=0))
    )
    pipe.fit(X, y)


def test_lasso_cv_positive_constraint():
    X, y, X_test, y_test = build_dataset()
    max_iter = 500

    # Ensure the unconstrained fit has a negative coefficient
    clf_unconstrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2,
                                n_jobs=1)
    clf_unconstrained.fit(X, y)
    assert min(clf_unconstrained.coef_) < 0

    # On same data, constrained fit has non-negative coefficients
    clf_constrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter,
                              positive=True, cv=2, n_jobs=1)
    clf_constrained.fit(X, y)
    assert min(clf_constrained.coef_) >= 0


@pytest.mark.parametrize(
    "LinearModel, params",
    [(Lasso, {"tol": 1e-16, "alpha": 0.1}),
     (LassoLars, {"alpha": 0.1}),
     (RidgeClassifier, {"solver": 'sparse_cg', "alpha": 0.1}),
     (ElasticNet, {"tol": 1e-16, 'l1_ratio': 1, "alpha": 0.1}),
     (ElasticNet, {"tol": 1e-16, 'l1_ratio': 0, "alpha": 0.1}),
     (Ridge, {"solver": 'sparse_cg', 'tol': 1e-12, "alpha": 0.1}),
     (BayesianRidge, {}),
     (ARDRegression, {}),
     (OrthogonalMatchingPursuit, {}),
     (MultiTaskElasticNet, {"tol": 1e-16, 'l1_ratio': 1, "alpha": 0.1}),
     (MultiTaskElasticNet, {"tol": 1e-16, 'l1_ratio': 0, "alpha": 0.1}),
     (MultiTaskLasso, {"tol": 1e-16, "alpha": 0.1}),
     (Lars, {}),
     (LinearRegression, {}),
     (LassoLarsIC, {})]
)
def test_model_pipeline_same_as_normalize_true(LinearModel, params):
    # Test that linear models (LinearModel) set with normalize set to True are
    # doing the same as the same linear model preceeded by StandardScaler
    # in the pipeline and with normalize set to False

    # normalize is True
    model_name = LinearModel.__name__
    model_normalize = LinearModel(normalize=True, fit_intercept=True, **params)

    pipeline = make_pipeline(
        StandardScaler(),
        LinearModel(normalize=False, fit_intercept=True, **params)
    )

    is_multitask = model_normalize._get_tags()["multioutput_only"]

    # prepare the data
    n_samples, n_features = 100, 2
    rng = np.random.RandomState(0)
    w = rng.randn(n_features)
    X = rng.randn(n_samples, n_features)
    X += 20  # make features non-zero mean
    y = X.dot(w)

    # make classes out of regression
    if is_classifier(model_normalize):
        y[y > np.mean(y)] = -1
        y[y > 0] = 1
    if is_multitask:
        y = np.stack((y, y), axis=1)

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

    if 'alpha' in params:
        model_normalize.set_params(alpha=params['alpha'])
        if model_name in ['Lasso', 'LassoLars', 'MultiTaskLasso']:
            new_params = dict(
                alpha=params['alpha'] * np.sqrt(X_train.shape[0]))
        if model_name in ['Ridge', 'RidgeClassifier']:
            new_params = dict(alpha=params['alpha'] * X_train.shape[0])
    if model_name in ['ElasticNet', 'MultiTaskElasticNet']:
        if params['l1_ratio'] == 1:
            new_params = dict(
                alpha=params['alpha'] * np.sqrt(X_train.shape[0]))
        if params['l1_ratio'] == 0:
            new_params = dict(alpha=params['alpha'] * X_train.shape[0])

    if 'new_params' in locals():
        pipeline[1].set_params(**new_params)

    model_normalize.fit(X_train, y_train)
    y_pred_normalize = model_normalize.predict(X_test)

    pipeline.fit(X_train, y_train)
    y_pred_standardize = pipeline.predict(X_test)

    assert_allclose(
        model_normalize.coef_ * pipeline[0].scale_, pipeline[1].coef_)
    assert pipeline[1].intercept_ == pytest.approx(y_train.mean())
    assert (model_normalize.intercept_ ==
            pytest.approx(y_train.mean() -
                          model_normalize.coef_.dot(X_train.mean(0))))
    assert_allclose(y_pred_normalize, y_pred_standardize)


@pytest.mark.parametrize(
    "LinearModel, params",
    [(Lasso, {"tol": 1e-16, "alpha": 0.1}),
     (LassoCV, {"tol": 1e-16}),
     (ElasticNetCV, {}),
     (RidgeClassifier, {"solver": 'sparse_cg', "alpha": 0.1}),
     (ElasticNet, {"tol": 1e-16, 'l1_ratio': 1, "alpha": 0.01}),
     (ElasticNet, {"tol": 1e-16, 'l1_ratio': 0, "alpha": 0.01}),
     (Ridge, {"solver": 'sparse_cg', 'tol': 1e-12, "alpha": 0.1}),
     (LinearRegression, {}),
     (RidgeCV, {})]
 )
def test_model_pipeline_same_dense_and_sparse(LinearModel, params):
    # Test that linear model preceeded by StandardScaler in the pipeline and
    # with normalize set to False gives the same y_pred and the same .coef_
    # given X sparse or dense

    model_dense = make_pipeline(
        StandardScaler(with_mean=False),
        LinearModel(normalize=False, **params)
    )

    model_sparse = make_pipeline(
        StandardScaler(with_mean=False),
        LinearModel(normalize=False, **params)
    )

    # prepare the data
    rng = np.random.RandomState(0)
    n_samples = 200
    n_features = 2
    X = rng.randn(n_samples, n_features)
    X[X < 0.1] = 0.

    X_sparse = sparse.csr_matrix(X)
    y = rng.rand(n_samples)

    if is_classifier(model_dense):
        y = np.sign(y)

    model_dense.fit(X, y)
    model_sparse.fit(X_sparse, y)

    assert_allclose(model_sparse[1].coef_, model_dense[1].coef_)
    y_pred_dense = model_dense.predict(X)
    y_pred_sparse = model_sparse.predict(X_sparse)
    assert_allclose(y_pred_dense, y_pred_sparse)

    assert_allclose(model_dense[1].intercept_, model_sparse[1].intercept_)


def test_lasso_path_return_models_vs_new_return_gives_same_coefficients():
    # Test that lasso_path with lars_path style output gives the
    # same result

    # Some toy data
    X = np.array([[1, 2, 3.1], [2.3, 5.4, 4.3]]).T
    y = np.array([1, 2, 3.1])
    alphas = [5., 1., .5]

    # Use lars_path and lasso_path(new output) with 1D linear interpolation
    # to compute the same path
    alphas_lars, _, coef_path_lars = lars_path(X, y, method='lasso')
    coef_path_cont_lars = interpolate.interp1d(alphas_lars[::-1],
                                               coef_path_lars[:, ::-1])
    alphas_lasso2, coef_path_lasso2, _ = lasso_path(X, y, alphas=alphas,
                                                    return_models=False)
    coef_path_cont_lasso = interpolate.interp1d(alphas_lasso2[::-1],
                                                coef_path_lasso2[:, ::-1])

    assert_array_almost_equal(
        coef_path_cont_lasso(alphas), coef_path_cont_lars(alphas),
        decimal=1)


def test_enet_path():
    # We use a large number of samples and of informative features so that
    # the l1_ratio selected is more toward ridge than lasso
    X, y, X_test, y_test = build_dataset(n_samples=200, n_features=100,
                                         n_informative_features=100)
    max_iter = 150

    # Here we have a small number of iterations, and thus the
    # ElasticNet might not converge. This is to speed up tests
    clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3,
                       l1_ratio=[0.5, 0.7], cv=3,
                       max_iter=max_iter)
    ignore_warnings(clf.fit)(X, y)
    # Well-conditioned settings, we should have selected our
    # smallest penalty
    assert_almost_equal(clf.alpha_, min(clf.alphas_))
    # Non-sparse ground truth: we should have selected an elastic-net
    # that is closer to ridge than to lasso
    assert clf.l1_ratio_ == min(clf.l1_ratio)

    clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3,
                       l1_ratio=[0.5, 0.7], cv=3,
                       max_iter=max_iter, precompute=True)
    ignore_warnings(clf.fit)(X, y)

    # Well-conditioned settings, we should have selected our
    # smallest penalty
    assert_almost_equal(clf.alpha_, min(clf.alphas_))
    # Non-sparse ground truth: we should have selected an elastic-net
    # that is closer to ridge than to lasso
    assert clf.l1_ratio_ == min(clf.l1_ratio)

    # We are in well-conditioned settings with low noise: we should
    # have a good test-set performance
    assert clf.score(X_test, y_test) > 0.99

    # Multi-output/target case
    X, y, X_test, y_test = build_dataset(n_features=10, n_targets=3)
    clf = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7],
                                cv=3, max_iter=max_iter)
    ignore_warnings(clf.fit)(X, y)
    # We are in well-conditioned settings with low noise: we should
    # have a good test-set performance
    assert clf.score(X_test, y_test) > 0.99
    assert clf.coef_.shape == (3, 10)

    # Mono-output should have same cross-validated alpha_ and l1_ratio_
    # in both cases.
    X, y, _, _ = build_dataset(n_features=10)
    clf1 = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
    clf1.fit(X, y)
    clf2 = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
    clf2.fit(X, y[:, np.newaxis])
    assert_almost_equal(clf1.l1_ratio_, clf2.l1_ratio_)
    assert_almost_equal(clf1.alpha_, clf2.alpha_)


def test_path_parameters():
    X, y, _, _ = build_dataset()
    max_iter = 100

    clf = ElasticNetCV(n_alphas=50, eps=1e-3, max_iter=max_iter,
                       l1_ratio=0.5, tol=1e-3)
    clf.fit(X, y)  # new params
    assert_almost_equal(0.5, clf.l1_ratio)
    assert 50 == clf.n_alphas
    assert 50 == len(clf.alphas_)


def test_warm_start():
    X, y, _, _ = build_dataset()
    clf = ElasticNet(alpha=0.1, max_iter=5, warm_start=True)
    ignore_warnings(clf.fit)(X, y)
    ignore_warnings(clf.fit)(X, y)  # do a second round with 5 iterations

    clf2 = ElasticNet(alpha=0.1, max_iter=10)
    ignore_warnings(clf2.fit)(X, y)
    assert_array_almost_equal(clf2.coef_, clf.coef_)


def test_lasso_alpha_warning():
    X = [[-1], [0], [1]]
    Y = [-1, 0, 1]       # just a straight line

    clf = Lasso(alpha=0)
    assert_warns(UserWarning, clf.fit, X, Y)


def test_lasso_positive_constraint():
    X = [[-1], [0], [1]]
    y = [1, 0, -1]       # just a straight line with negative slope

    lasso = Lasso(alpha=0.1, max_iter=1000, positive=True)
    lasso.fit(X, y)
    assert min(lasso.coef_) >= 0

    lasso = Lasso(alpha=0.1, max_iter=1000, precompute=True, positive=True)
    lasso.fit(X, y)
    assert min(lasso.coef_) >= 0


def test_enet_positive_constraint():
    X = [[-1], [0], [1]]
    y = [1, 0, -1]       # just a straight line with negative slope

    enet = ElasticNet(alpha=0.1, max_iter=1000, positive=True)
    enet.fit(X, y)
    assert min(enet.coef_) >= 0


def test_enet_cv_positive_constraint():
    X, y, X_test, y_test = build_dataset()
    max_iter = 500

    # Ensure the unconstrained fit has a negative coefficient
    enetcv_unconstrained = ElasticNetCV(n_alphas=3, eps=1e-1,
                                        max_iter=max_iter,
                                        cv=2, n_jobs=1)
    enetcv_unconstrained.fit(X, y)
    assert min(enetcv_unconstrained.coef_) < 0

    # On same data, constrained fit has non-negative coefficients
    enetcv_constrained = ElasticNetCV(n_alphas=3, eps=1e-1, max_iter=max_iter,
                                      cv=2, positive=True, n_jobs=1)
    enetcv_constrained.fit(X, y)
    assert min(enetcv_constrained.coef_) >= 0


def test_uniform_targets():
    enet = ElasticNetCV(n_alphas=3)
    m_enet = MultiTaskElasticNetCV(n_alphas=3)
    lasso = LassoCV(n_alphas=3)
    m_lasso = MultiTaskLassoCV(n_alphas=3)

    models_single_task = (enet, lasso)
    models_multi_task = (m_enet, m_lasso)

    rng = np.random.RandomState(0)

    X_train = rng.random_sample(size=(10, 3))
    X_test = rng.random_sample(size=(10, 3))

    y1 = np.empty(10)
    y2 = np.empty((10, 2))

    for model in models_single_task:
        for y_values in (0, 5):
            y1.fill(y_values)
            assert_array_equal(model.fit(X_train, y1).predict(X_test), y1)
            assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3)

    for model in models_multi_task:
        for y_values in (0, 5):
            y2[:, 0].fill(y_values)
            y2[:, 1].fill(2 * y_values)
            assert_array_equal(model.fit(X_train, y2).predict(X_test), y2)
            assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3)


def test_multi_task_lasso_and_enet():
    X, y, X_test, y_test = build_dataset()
    Y = np.c_[y, y]
    # Y_test = np.c_[y_test, y_test]
    clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y)
    assert 0 < clf.dual_gap_ < 1e-5
    assert_array_almost_equal(clf.coef_[0], clf.coef_[1])

    clf = MultiTaskElasticNet(alpha=1, tol=1e-8).fit(X, Y)
    assert 0 < clf.dual_gap_ < 1e-5
    assert_array_almost_equal(clf.coef_[0], clf.coef_[1])

    clf = MultiTaskElasticNet(alpha=1.0, tol=1e-8, max_iter=1)
    assert_warns_message(ConvergenceWarning, 'did not converge', clf.fit, X, Y)


def test_lasso_readonly_data():
    X = np.array([[-1], [0], [1]])
    Y = np.array([-1, 0, 1])   # just a straight line
    T = np.array([[2], [3], [4]])  # test sample
    with TempMemmap((X, Y)) as (X, Y):
        clf = Lasso(alpha=0.5)
        clf.fit(X, Y)
        pred = clf.predict(T)
        assert_array_almost_equal(clf.coef_, [.25])
        assert_array_almost_equal(pred, [0.5, 0.75, 1.])
        assert_almost_equal(clf.dual_gap_, 0)


def test_multi_task_lasso_readonly_data():
    X, y, X_test, y_test = build_dataset()
    Y = np.c_[y, y]
    with TempMemmap((X, Y)) as (X, Y):
        Y = np.c_[y, y]
        clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y)
        assert 0 < clf.dual_gap_ < 1e-5
        assert_array_almost_equal(clf.coef_[0], clf.coef_[1])


def test_enet_multitarget():
    n_targets = 3
    X, y, _, _ = build_dataset(n_samples=10, n_features=8,
                               n_informative_features=10, n_targets=n_targets)
    estimator = ElasticNet(alpha=0.01)
    estimator.fit(X, y)
    coef, intercept, dual_gap = (estimator.coef_, estimator.intercept_,
                                 estimator.dual_gap_)

    for k in range(n_targets):
        estimator.fit(X, y[:, k])
        assert_array_almost_equal(coef[k, :], estimator.coef_)
        assert_array_almost_equal(intercept[k], estimator.intercept_)
        assert_array_almost_equal(dual_gap[k], estimator.dual_gap_)


def test_multioutput_enetcv_error():
    rng = np.random.RandomState(0)
    X = rng.randn(10, 2)
    y = rng.randn(10, 2)
    clf = ElasticNetCV()
    assert_raises(ValueError, clf.fit, X, y)


def test_multitask_enet_and_lasso_cv():
    X, y, _, _ = build_dataset(n_features=50, n_targets=3)
    clf = MultiTaskElasticNetCV(cv=3).fit(X, y)
    assert_almost_equal(clf.alpha_, 0.00556, 3)
    clf = MultiTaskLassoCV(cv=3).fit(X, y)
    assert_almost_equal(clf.alpha_, 0.00278, 3)

    X, y, _, _ = build_dataset(n_targets=3)
    clf = MultiTaskElasticNetCV(n_alphas=10, eps=1e-3, max_iter=100,
                                l1_ratio=[0.3, 0.5], tol=1e-3, cv=3)
    clf.fit(X, y)
    assert 0.5 == clf.l1_ratio_
    assert (3, X.shape[1]) == clf.coef_.shape
    assert (3, ) == clf.intercept_.shape
    assert (2, 10, 3) == clf.mse_path_.shape
    assert (2, 10) == clf.alphas_.shape

    X, y, _, _ = build_dataset(n_targets=3)
    clf = MultiTaskLassoCV(n_alphas=10, eps=1e-3, max_iter=100, tol=1e-3, cv=3)
    clf.fit(X, y)
    assert (3, X.shape[1]) == clf.coef_.shape
    assert (3, ) == clf.intercept_.shape
    assert (10, 3) == clf.mse_path_.shape
    assert 10 == len(clf.alphas_)


def test_1d_multioutput_enet_and_multitask_enet_cv():
    X, y, _, _ = build_dataset(n_features=10)
    y = y[:, np.newaxis]
    clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
    clf.fit(X, y[:, 0])
    clf1 = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
    clf1.fit(X, y)
    assert_almost_equal(clf.l1_ratio_, clf1.l1_ratio_)
    assert_almost_equal(clf.alpha_, clf1.alpha_)
    assert_almost_equal(clf.coef_, clf1.coef_[0])
    assert_almost_equal(clf.intercept_, clf1.intercept_[0])


def test_1d_multioutput_lasso_and_multitask_lasso_cv():
    X, y, _, _ = build_dataset(n_features=10)
    y = y[:, np.newaxis]
    clf = LassoCV(n_alphas=5, eps=2e-3)
    clf.fit(X, y[:, 0])
    clf1 = MultiTaskLassoCV(n_alphas=5, eps=2e-3)
    clf1.fit(X, y)
    assert_almost_equal(clf.alpha_, clf1.alpha_)
    assert_almost_equal(clf.coef_, clf1.coef_[0])
    assert_almost_equal(clf.intercept_, clf1.intercept_[0])


def test_sparse_input_dtype_enet_and_lassocv():
    X, y, _, _ = build_dataset(n_features=10)
    clf = ElasticNetCV(n_alphas=5)
    clf.fit(sparse.csr_matrix(X), y)
    clf1 = ElasticNetCV(n_alphas=5)
    clf1.fit(sparse.csr_matrix(X, dtype=np.float32), y)
    assert_almost_equal(clf.alpha_, clf1.alpha_, decimal=6)
    assert_almost_equal(clf.coef_, clf1.coef_, decimal=6)

    clf = LassoCV(n_alphas=5)
    clf.fit(sparse.csr_matrix(X), y)
    clf1 = LassoCV(n_alphas=5)
    clf1.fit(sparse.csr_matrix(X, dtype=np.float32), y)
    assert_almost_equal(clf.alpha_, clf1.alpha_, decimal=6)
    assert_almost_equal(clf.coef_, clf1.coef_, decimal=6)


def test_precompute_invalid_argument():
    X, y, _, _ = build_dataset()
    for clf in [ElasticNetCV(precompute="invalid"),
                LassoCV(precompute="invalid")]:
        assert_raises_regex(ValueError, ".*should be.*True.*False.*auto.*"
                            "array-like.*Got 'invalid'", clf.fit, X, y)

    # Precompute = 'auto' is not supported for ElasticNet and Lasso
    assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*"
                        "Got 'auto'", ElasticNet(precompute='auto').fit, X, y)
    assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*"
                        "Got 'auto'", Lasso(precompute='auto').fit, X, y)


def test_warm_start_convergence():
    X, y, _, _ = build_dataset()
    model = ElasticNet(alpha=1e-3, tol=1e-3).fit(X, y)
    n_iter_reference = model.n_iter_

    # This dataset is not trivial enough for the model to converge in one pass.
    assert n_iter_reference > 2

    # Check that n_iter_ is invariant to multiple calls to fit
    # when warm_start=False, all else being equal.
    model.fit(X, y)
    n_iter_cold_start = model.n_iter_
    assert n_iter_cold_start == n_iter_reference

    # Fit the same model again, using a warm start: the optimizer just performs
    # a single pass before checking that it has already converged
    model.set_params(warm_start=True)
    model.fit(X, y)
    n_iter_warm_start = model.n_iter_
    assert n_iter_warm_start == 1


def test_warm_start_convergence_with_regularizer_decrement():
    X, y = load_diabetes(return_X_y=True)

    # Train a model to converge on a lightly regularized problem
    final_alpha = 1e-5
    low_reg_model = ElasticNet(alpha=final_alpha).fit(X, y)

    # Fitting a new model on a more regularized version of the same problem.
    # Fitting with high regularization is easier it should converge faster
    # in general.
    high_reg_model = ElasticNet(alpha=final_alpha * 10).fit(X, y)
    assert low_reg_model.n_iter_ > high_reg_model.n_iter_

    # Fit the solution to the original, less regularized version of the
    # problem but from the solution of the highly regularized variant of
    # the problem as a better starting point. This should also converge
    # faster than the original model that starts from zero.
    warm_low_reg_model = deepcopy(high_reg_model)
    warm_low_reg_model.set_params(warm_start=True, alpha=final_alpha)
    warm_low_reg_model.fit(X, y)
    assert low_reg_model.n_iter_ > warm_low_reg_model.n_iter_


def test_random_descent():
    # Test that both random and cyclic selection give the same results.
    # Ensure that the test models fully converge and check a wide
    # range of conditions.

    # This uses the coordinate descent algo using the gram trick.
    X, y, _, _ = build_dataset(n_samples=50, n_features=20)
    clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8)
    clf_cyclic.fit(X, y)
    clf_random = ElasticNet(selection='random', tol=1e-8, random_state=42)
    clf_random.fit(X, y)
    assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_)
    assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_)

    # This uses the descent algo without the gram trick
    clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8)
    clf_cyclic.fit(X.T, y[:20])
    clf_random = ElasticNet(selection='random', tol=1e-8, random_state=42)
    clf_random.fit(X.T, y[:20])
    assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_)
    assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_)

    # Sparse Case
    clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8)
    clf_cyclic.fit(sparse.csr_matrix(X), y)
    clf_random = ElasticNet(selection='random', tol=1e-8, random_state=42)
    clf_random.fit(sparse.csr_matrix(X), y)
    assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_)
    assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_)

    # Multioutput case.
    new_y = np.hstack((y[:, np.newaxis], y[:, np.newaxis]))
    clf_cyclic = MultiTaskElasticNet(selection='cyclic', tol=1e-8)
    clf_cyclic.fit(X, new_y)
    clf_random = MultiTaskElasticNet(selection='random', tol=1e-8,
                                     random_state=42)
    clf_random.fit(X, new_y)
    assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_)
    assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_)

    # Raise error when selection is not in cyclic or random.
    clf_random = ElasticNet(selection='invalid')
    assert_raises(ValueError, clf_random.fit, X, y)


def test_enet_path_positive():
    # Test positive parameter

    X, Y, _, _ = build_dataset(n_samples=50, n_features=50, n_targets=2)

    # For mono output
    # Test that the coefs returned by positive=True in enet_path are positive
    for path in [enet_path, lasso_path]:
        pos_path_coef = path(X, Y[:, 0], positive=True)[1]
        assert np.all(pos_path_coef >= 0)

    # For multi output, positive parameter is not allowed
    # Test that an error is raised
    for path in [enet_path, lasso_path]:
        assert_raises(ValueError, path, X, Y, positive=True)


def test_sparse_dense_descent_paths():
    # Test that dense and sparse input give the same input for descent paths.
    X, y, _, _ = build_dataset(n_samples=50, n_features=20)
    csr = sparse.csr_matrix(X)
    for path in [enet_path, lasso_path]:
        _, coefs, _ = path(X, y, fit_intercept=False)
        _, sparse_coefs, _ = path(csr, y, fit_intercept=False)
        assert_array_almost_equal(coefs, sparse_coefs)


def test_check_input_false():
    X, y, _, _ = build_dataset(n_samples=20, n_features=10)
    X = check_array(X, order='F', dtype='float64')
    y = check_array(X, order='F', dtype='float64')
    clf = ElasticNet(selection='cyclic', tol=1e-8)
    # Check that no error is raised if data is provided in the right format
    clf.fit(X, y, check_input=False)
    # With check_input=False, an exhaustive check is not made on y but its
    # dtype is still cast in _preprocess_data to X's dtype. So the test should
    # pass anyway
    X = check_array(X, order='F', dtype='float32')
    clf.fit(X, y, check_input=False)
    # With no input checking, providing X in C order should result in false
    # computation
    X = check_array(X, order='C', dtype='float64')
    assert_raises(ValueError, clf.fit, X, y, check_input=False)


@pytest.mark.parametrize("check_input", [True, False])
def test_enet_copy_X_True(check_input):
    X, y, _, _ = build_dataset()
    X = X.copy(order='F')

    original_X = X.copy()
    enet = ElasticNet(copy_X=True)
    enet.fit(X, y, check_input=check_input)

    assert_array_equal(original_X, X)


def test_enet_copy_X_False_check_input_False():
    X, y, _, _ = build_dataset()
    X = X.copy(order='F')

    original_X = X.copy()
    enet = ElasticNet(copy_X=False)
    enet.fit(X, y, check_input=False)

    # No copying, X is overwritten
    assert np.any(np.not_equal(original_X, X))


def test_overrided_gram_matrix():
    X, y, _, _ = build_dataset(n_samples=20, n_features=10)
    Gram = X.T.dot(X)
    clf = ElasticNet(selection='cyclic', tol=1e-8, precompute=Gram)
    assert_warns_message(UserWarning,
                         "Gram matrix was provided but X was centered"
                         " to fit intercept, "
                         "or X was normalized : recomputing Gram matrix.",
                         clf.fit, X, y)


@pytest.mark.parametrize('model', [ElasticNet, Lasso])
def test_lasso_non_float_y(model):
    X = [[0, 0], [1, 1], [-1, -1]]
    y = [0, 1, 2]
    y_float = [0.0, 1.0, 2.0]

    clf = model(fit_intercept=False)
    clf.fit(X, y)
    clf_float = model(fit_intercept=False)
    clf_float.fit(X, y_float)
    assert_array_equal(clf.coef_, clf_float.coef_)


def test_enet_float_precision():
    # Generate dataset
    X, y, X_test, y_test = build_dataset(n_samples=20, n_features=10)
    # Here we have a small number of iterations, and thus the
    # ElasticNet might not converge. This is to speed up tests

    for normalize in [True, False]:
        for fit_intercept in [True, False]:
            coef = {}
            intercept = {}
            for dtype in [np.float64, np.float32]:
                clf = ElasticNet(alpha=0.5, max_iter=100, precompute=False,
                                 fit_intercept=fit_intercept,
                                 normalize=normalize)

                X = dtype(X)
                y = dtype(y)
                ignore_warnings(clf.fit)(X, y)

                coef[('simple', dtype)] = clf.coef_
                intercept[('simple', dtype)] = clf.intercept_

                assert clf.coef_.dtype == dtype

                # test precompute Gram array
                Gram = X.T.dot(X)
                clf_precompute = ElasticNet(alpha=0.5, max_iter=100,
                                            precompute=Gram,
                                            fit_intercept=fit_intercept,
                                            normalize=normalize)
                ignore_warnings(clf_precompute.fit)(X, y)
                assert_array_almost_equal(clf.coef_, clf_precompute.coef_)
                assert_array_almost_equal(clf.intercept_,
                                          clf_precompute.intercept_)

                # test multi task enet
                multi_y = np.hstack((y[:, np.newaxis], y[:, np.newaxis]))
                clf_multioutput = MultiTaskElasticNet(
                    alpha=0.5, max_iter=100, fit_intercept=fit_intercept,
                    normalize=normalize)
                clf_multioutput.fit(X, multi_y)
                coef[('multi', dtype)] = clf_multioutput.coef_
                intercept[('multi', dtype)] = clf_multioutput.intercept_
                assert clf.coef_.dtype == dtype

            for v in ['simple', 'multi']:
                assert_array_almost_equal(coef[(v, np.float32)],
                                          coef[(v, np.float64)],
                                          decimal=4)
                assert_array_almost_equal(intercept[(v, np.float32)],
                                          intercept[(v, np.float64)],
                                          decimal=4)


def test_enet_l1_ratio():
    # Test that an error message is raised if an estimator that
    # uses _alpha_grid is called with l1_ratio=0
    msg = ("Automatic alpha grid generation is not supported for l1_ratio=0. "
           "Please supply a grid by providing your estimator with the "
           "appropriate `alphas=` argument.")
    X = np.array([[1, 2, 4, 5, 8], [3, 5, 7, 7, 8]]).T
    y = np.array([12, 10, 11, 21, 5])

    assert_raise_message(ValueError, msg, ElasticNetCV(
        l1_ratio=0, random_state=42).fit, X, y)
    assert_raise_message(ValueError, msg, MultiTaskElasticNetCV(
        l1_ratio=0, random_state=42).fit, X, y[:, None])

    # Test that l1_ratio=0 is allowed if we supply a grid manually
    alphas = [0.1, 10]
    estkwds = {'alphas': alphas, 'random_state': 42}
    est_desired = ElasticNetCV(l1_ratio=0.00001, **estkwds)
    est = ElasticNetCV(l1_ratio=0, **estkwds)
    with ignore_warnings():
        est_desired.fit(X, y)
        est.fit(X, y)
    assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5)

    est_desired = MultiTaskElasticNetCV(l1_ratio=0.00001, **estkwds)
    est = MultiTaskElasticNetCV(l1_ratio=0, **estkwds)
    with ignore_warnings():
        est.fit(X, y[:, None])
        est_desired.fit(X, y[:, None])
    assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5)


def test_coef_shape_not_zero():
    est_no_intercept = Lasso(fit_intercept=False)
    est_no_intercept.fit(np.c_[np.ones(3)], np.ones(3))
    assert est_no_intercept.coef_.shape == (1,)


def test_warm_start_multitask_lasso():
    X, y, X_test, y_test = build_dataset()
    Y = np.c_[y, y]
    clf = MultiTaskLasso(alpha=0.1, max_iter=5, warm_start=True)
    ignore_warnings(clf.fit)(X, Y)
    ignore_warnings(clf.fit)(X, Y)  # do a second round with 5 iterations

    clf2 = MultiTaskLasso(alpha=0.1, max_iter=10)
    ignore_warnings(clf2.fit)(X, Y)
    assert_array_almost_equal(clf2.coef_, clf.coef_)


@pytest.mark.parametrize('klass, n_classes, kwargs',
                         [(Lasso, 1, dict(precompute=True)),
                          (Lasso, 1, dict(precompute=False)),
                          (MultiTaskLasso, 2, dict()),
                          (MultiTaskLasso, 2, dict())])
def test_enet_coordinate_descent(klass, n_classes, kwargs):
    """Test that a warning is issued if model does not converge"""
    clf = klass(max_iter=2, **kwargs)
    n_samples = 5
    n_features = 2
    X = np.ones((n_samples, n_features)) * 1e50
    y = np.ones((n_samples, n_classes))
    if klass == Lasso:
        y = y.ravel()
    assert_warns(ConvergenceWarning, clf.fit, X, y)


def test_convergence_warnings():
    random_state = np.random.RandomState(0)
    X = random_state.standard_normal((1000, 500))
    y = random_state.standard_normal((1000, 3))

    # check that the model fails to converge (a negative dual gap cannot occur)
    with pytest.warns(ConvergenceWarning):
        MultiTaskElasticNet(max_iter=1, tol=-1).fit(X, y)

    # check that the model converges w/o warnings
    with pytest.warns(None) as record:
        MultiTaskElasticNet(max_iter=1000).fit(X, y)

    assert not record.list


def test_sparse_input_convergence_warning():
    X, y, _, _ = build_dataset(n_samples=1000, n_features=500)

    with pytest.warns(ConvergenceWarning):
        ElasticNet(max_iter=1, tol=0).fit(
            sparse.csr_matrix(X, dtype=np.float32), y)

    # check that the model converges w/o warnings
    with pytest.warns(None) as record:
        Lasso(max_iter=1000).fit(sparse.csr_matrix(X, dtype=np.float32), y)

    assert not record.list


@pytest.mark.parametrize("precompute, inner_precompute", [
    (True, True),
    ('auto', False),
    (False, False),
])
def test_lassoCV_does_not_set_precompute(monkeypatch, precompute,
                                         inner_precompute):
    X, y, _, _ = build_dataset()
    calls = 0

    class LassoMock(Lasso):
        def fit(self, X, y):
            super().fit(X, y)
            nonlocal calls
            calls += 1
            assert self.precompute == inner_precompute

    monkeypatch.setattr("sklearn.linear_model._coordinate_descent.Lasso",
                        LassoMock)
    clf = LassoCV(precompute=precompute)
    clf.fit(X, y)
    assert calls > 0


def test_multi_task_lasso_cv_dtype():
    n_samples, n_features = 10, 3
    rng = np.random.RandomState(42)
    X = rng.binomial(1, .5, size=(n_samples, n_features))
    X = X.astype(int)  # make it explicit that X is int
    y = X[:, [0, 0]].copy()
    est = MultiTaskLassoCV(n_alphas=5, fit_intercept=True).fit(X, y)
    assert_array_almost_equal(est.coef_, [[1, 0, 0]] * 2, decimal=3)


@pytest.mark.parametrize('fit_intercept', [True, False])
@pytest.mark.parametrize('alpha', [0.01])
@pytest.mark.parametrize('normalize', [False, True])
@pytest.mark.parametrize('precompute', [False, True])
def test_enet_sample_weight_consistency(fit_intercept, alpha, normalize,
                                        precompute):
    """Test that the impact of sample_weight is consistent."""
    rng = np.random.RandomState(0)
    n_samples, n_features = 10, 5

    X = rng.rand(n_samples, n_features)
    y = rng.rand(n_samples)
    params = dict(alpha=alpha, fit_intercept=fit_intercept,
                  precompute=precompute, tol=1e-6, l1_ratio=0.5)

    reg = ElasticNet(**params).fit(X, y)
    coef = reg.coef_.copy()
    if fit_intercept:
        intercept = reg.intercept_

    # sample_weight=np.ones(..) should be equivalent to sample_weight=None
    sample_weight = np.ones_like(y)
    reg.fit(X, y, sample_weight=sample_weight)
    assert_allclose(reg.coef_, coef, rtol=1e-6)
    if fit_intercept:
        assert_allclose(reg.intercept_, intercept)

    # sample_weight=None should be equivalent to sample_weight = number
    sample_weight = 123.
    reg.fit(X, y, sample_weight=sample_weight)
    assert_allclose(reg.coef_, coef, rtol=1e-6)
    if fit_intercept:
        assert_allclose(reg.intercept_, intercept)

    # scaling of sample_weight should have no effect, cf. np.average()
    sample_weight = 2 * np.ones_like(y)
    reg.fit(X, y, sample_weight=sample_weight)
    assert_allclose(reg.coef_, coef, rtol=1e-6)
    if fit_intercept:
        assert_allclose(reg.intercept_, intercept)

    # setting one element of sample_weight to 0 is equivalent to removing
    # the corresponding sample
    sample_weight = np.ones_like(y)
    sample_weight[-1] = 0
    reg.fit(X, y, sample_weight=sample_weight)
    coef1 = reg.coef_.copy()
    if fit_intercept:
        intercept1 = reg.intercept_
    reg.fit(X[:-1], y[:-1])
    assert_allclose(reg.coef_, coef1, rtol=1e-6)
    if fit_intercept:
        assert_allclose(reg.intercept_, intercept1)

    # check that multiplying sample_weight by 2 is equivalent
    # to repeating corresponding samples twice
    if sparse.issparse(X):
        X = X.toarray()

    X2 = np.concatenate([X, X[:n_samples//2]], axis=0)
    y2 = np.concatenate([y, y[:n_samples//2]])
    sample_weight_1 = np.ones(len(y))
    sample_weight_1[:n_samples//2] = 2

    reg1 = ElasticNet(**params).fit(
            X, y, sample_weight=sample_weight_1
    )

    reg2 = ElasticNet(**params).fit(
            X2, y2, sample_weight=None
    )
    assert_allclose(reg1.coef_, reg2.coef_)


def test_enet_sample_weight_sparse():
    reg = ElasticNet()
    X = sparse.csc_matrix(np.zeros((3, 2)))
    y = np.array([-1, 0, 1])
    sw = np.array([1, 2, 3])
    with pytest.raises(ValueError, match="Sample weights do not.*support "
                                         "sparse matrices"):
        reg.fit(X, y, sample_weight=sw, check_input=True)


@pytest.mark.parametrize("backend", ["loky", "threading"])
@pytest.mark.parametrize("estimator",
                         [ElasticNetCV, MultiTaskElasticNetCV,
                          LassoCV, MultiTaskLassoCV])
def test_linear_models_cv_fit_for_all_backends(backend, estimator):
    # LinearModelsCV.fit performs inplace operations on input data which is
    # memmapped when using loky backend, causing an error due to unexpected
    # behavior of fancy indexing of read-only memmaps (cf. numpy#14132).

    if (parse_version(joblib.__version__) < parse_version('0.12')
            and backend == 'loky'):
        pytest.skip('loky backend does not exist in joblib <0.12')

    # Create a problem sufficiently large to cause memmapping (1MB).
    n_targets = 1 + (estimator in (MultiTaskElasticNetCV, MultiTaskLassoCV))
    X, y = make_regression(20000, 10, n_targets=n_targets)

    with joblib.parallel_backend(backend=backend):
        estimator(n_jobs=2, cv=3).fit(X, y)