907 lines
30 KiB
Python
907 lines
30 KiB
Python
import warnings
|
|
|
|
import numpy as np
|
|
import pytest
|
|
from scipy import linalg
|
|
from sklearn.base import clone
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.pipeline import make_pipeline
|
|
from sklearn.preprocessing import StandardScaler
|
|
from sklearn.utils._testing import assert_allclose
|
|
from sklearn.utils._testing import assert_array_almost_equal
|
|
from sklearn.utils._testing import ignore_warnings
|
|
from sklearn.utils._testing import TempMemmap
|
|
from sklearn.utils import check_random_state
|
|
from sklearn.exceptions import ConvergenceWarning
|
|
from sklearn import linear_model, datasets
|
|
from sklearn.linear_model._least_angle import _lars_path_residues
|
|
from sklearn.linear_model import LassoLarsIC, lars_path
|
|
from sklearn.linear_model import Lars, LassoLars, LarsCV, LassoLarsCV
|
|
|
|
# TODO: use another dataset that has multiple drops
|
|
diabetes = datasets.load_diabetes()
|
|
X, y = diabetes.data, diabetes.target
|
|
G = np.dot(X.T, X)
|
|
Xy = np.dot(X.T, y)
|
|
n_samples = y.size
|
|
|
|
# TODO(1.4): 'normalize' to be removed
|
|
filterwarnings_normalize = pytest.mark.filterwarnings(
|
|
"ignore:'normalize' was deprecated"
|
|
)
|
|
|
|
|
|
# TODO(1.4) 'normalize' to be removed
|
|
@pytest.mark.parametrize(
|
|
"LeastAngleModel", [Lars, LassoLars, LarsCV, LassoLarsCV, LassoLarsIC]
|
|
)
|
|
@pytest.mark.parametrize(
|
|
"normalize, n_warnings", [(True, 1), (False, 1), ("deprecated", 0)]
|
|
)
|
|
def test_assure_warning_when_normalize(LeastAngleModel, normalize, n_warnings):
|
|
# check that we issue a FutureWarning when normalize was set
|
|
rng = check_random_state(0)
|
|
n_samples = 200
|
|
n_features = 2
|
|
X = rng.randn(n_samples, n_features)
|
|
X[X < 0.1] = 0.0
|
|
y = rng.rand(n_samples)
|
|
|
|
model = LeastAngleModel(normalize=normalize)
|
|
with warnings.catch_warnings(record=True) as rec:
|
|
warnings.simplefilter("always", FutureWarning)
|
|
model.fit(X, y)
|
|
|
|
assert len([w.message for w in rec]) == n_warnings
|
|
|
|
|
|
def test_simple():
|
|
# Principle of Lars is to keep covariances tied and decreasing
|
|
|
|
# also test verbose output
|
|
from io import StringIO
|
|
import sys
|
|
|
|
old_stdout = sys.stdout
|
|
try:
|
|
sys.stdout = StringIO()
|
|
|
|
_, _, coef_path_ = linear_model.lars_path(X, y, method="lar", verbose=10)
|
|
|
|
sys.stdout = old_stdout
|
|
|
|
for i, coef_ in enumerate(coef_path_.T):
|
|
res = y - np.dot(X, coef_)
|
|
cov = np.dot(X.T, res)
|
|
C = np.max(abs(cov))
|
|
eps = 1e-3
|
|
ocur = len(cov[C - eps < abs(cov)])
|
|
if i < X.shape[1]:
|
|
assert ocur == i + 1
|
|
else:
|
|
# no more than max_pred variables can go into the active set
|
|
assert ocur == X.shape[1]
|
|
finally:
|
|
sys.stdout = old_stdout
|
|
|
|
|
|
def test_simple_precomputed():
|
|
# The same, with precomputed Gram matrix
|
|
|
|
_, _, coef_path_ = linear_model.lars_path(X, y, Gram=G, method="lar")
|
|
|
|
for i, coef_ in enumerate(coef_path_.T):
|
|
res = y - np.dot(X, coef_)
|
|
cov = np.dot(X.T, res)
|
|
C = np.max(abs(cov))
|
|
eps = 1e-3
|
|
ocur = len(cov[C - eps < abs(cov)])
|
|
if i < X.shape[1]:
|
|
assert ocur == i + 1
|
|
else:
|
|
# no more than max_pred variables can go into the active set
|
|
assert ocur == X.shape[1]
|
|
|
|
|
|
def _assert_same_lars_path_result(output1, output2):
|
|
assert len(output1) == len(output2)
|
|
for o1, o2 in zip(output1, output2):
|
|
assert_allclose(o1, o2)
|
|
|
|
|
|
@pytest.mark.parametrize("method", ["lar", "lasso"])
|
|
@pytest.mark.parametrize("return_path", [True, False])
|
|
def test_lars_path_gram_equivalent(method, return_path):
|
|
_assert_same_lars_path_result(
|
|
linear_model.lars_path_gram(
|
|
Xy=Xy, Gram=G, n_samples=n_samples, method=method, return_path=return_path
|
|
),
|
|
linear_model.lars_path(X, y, Gram=G, method=method, return_path=return_path),
|
|
)
|
|
|
|
|
|
def test_x_none_gram_none_raises_value_error():
|
|
# Test that lars_path with no X and Gram raises exception
|
|
Xy = np.dot(X.T, y)
|
|
with pytest.raises(ValueError):
|
|
linear_model.lars_path(None, y, Gram=None, Xy=Xy)
|
|
|
|
|
|
def test_all_precomputed():
|
|
# Test that lars_path with precomputed Gram and Xy gives the right answer
|
|
G = np.dot(X.T, X)
|
|
Xy = np.dot(X.T, y)
|
|
for method in "lar", "lasso":
|
|
output = linear_model.lars_path(X, y, method=method)
|
|
output_pre = linear_model.lars_path(X, y, Gram=G, Xy=Xy, method=method)
|
|
for expected, got in zip(output, output_pre):
|
|
assert_array_almost_equal(expected, got)
|
|
|
|
|
|
# TODO(1.4): 'normalize' to be removed
|
|
@filterwarnings_normalize
|
|
@pytest.mark.filterwarnings("ignore: `rcond` parameter will change")
|
|
# numpy deprecation
|
|
def test_lars_lstsq():
|
|
# Test that Lars gives least square solution at the end
|
|
# of the path
|
|
X1 = 3 * X # use un-normalized dataset
|
|
clf = linear_model.LassoLars(alpha=0.0)
|
|
clf.fit(X1, y)
|
|
coef_lstsq = np.linalg.lstsq(X1, y, rcond=None)[0]
|
|
assert_array_almost_equal(clf.coef_, coef_lstsq)
|
|
|
|
|
|
@pytest.mark.filterwarnings("ignore:`rcond` parameter will change")
|
|
# numpy deprecation
|
|
def test_lasso_gives_lstsq_solution():
|
|
# Test that Lars Lasso gives least square solution at the end
|
|
# of the path
|
|
_, _, coef_path_ = linear_model.lars_path(X, y, method="lasso")
|
|
coef_lstsq = np.linalg.lstsq(X, y)[0]
|
|
assert_array_almost_equal(coef_lstsq, coef_path_[:, -1])
|
|
|
|
|
|
def test_collinearity():
|
|
# Check that lars_path is robust to collinearity in input
|
|
X = np.array([[3.0, 3.0, 1.0], [2.0, 2.0, 0.0], [1.0, 1.0, 0]])
|
|
y = np.array([1.0, 0.0, 0])
|
|
rng = np.random.RandomState(0)
|
|
|
|
f = ignore_warnings
|
|
_, _, coef_path_ = f(linear_model.lars_path)(X, y, alpha_min=0.01)
|
|
assert not np.isnan(coef_path_).any()
|
|
residual = np.dot(X, coef_path_[:, -1]) - y
|
|
assert (residual**2).sum() < 1.0 # just make sure it's bounded
|
|
|
|
n_samples = 10
|
|
X = rng.rand(n_samples, 5)
|
|
y = np.zeros(n_samples)
|
|
_, _, coef_path_ = linear_model.lars_path(
|
|
X,
|
|
y,
|
|
Gram="auto",
|
|
copy_X=False,
|
|
copy_Gram=False,
|
|
alpha_min=0.0,
|
|
method="lasso",
|
|
verbose=0,
|
|
max_iter=500,
|
|
)
|
|
assert_array_almost_equal(coef_path_, np.zeros_like(coef_path_))
|
|
|
|
|
|
def test_no_path():
|
|
# Test that the ``return_path=False`` option returns the correct output
|
|
alphas_, _, coef_path_ = linear_model.lars_path(X, y, method="lar")
|
|
alpha_, _, coef = linear_model.lars_path(X, y, method="lar", return_path=False)
|
|
|
|
assert_array_almost_equal(coef, coef_path_[:, -1])
|
|
assert alpha_ == alphas_[-1]
|
|
|
|
|
|
def test_no_path_precomputed():
|
|
# Test that the ``return_path=False`` option with Gram remains correct
|
|
alphas_, _, coef_path_ = linear_model.lars_path(X, y, method="lar", Gram=G)
|
|
alpha_, _, coef = linear_model.lars_path(
|
|
X, y, method="lar", Gram=G, return_path=False
|
|
)
|
|
|
|
assert_array_almost_equal(coef, coef_path_[:, -1])
|
|
assert alpha_ == alphas_[-1]
|
|
|
|
|
|
def test_no_path_all_precomputed():
|
|
# Test that the ``return_path=False`` option with Gram and Xy remains
|
|
# correct
|
|
X, y = 3 * diabetes.data, diabetes.target
|
|
G = np.dot(X.T, X)
|
|
Xy = np.dot(X.T, y)
|
|
alphas_, _, coef_path_ = linear_model.lars_path(
|
|
X, y, method="lasso", Xy=Xy, Gram=G, alpha_min=0.9
|
|
)
|
|
alpha_, _, coef = linear_model.lars_path(
|
|
X, y, method="lasso", Gram=G, Xy=Xy, alpha_min=0.9, return_path=False
|
|
)
|
|
|
|
assert_array_almost_equal(coef, coef_path_[:, -1])
|
|
assert alpha_ == alphas_[-1]
|
|
|
|
|
|
@filterwarnings_normalize
|
|
@pytest.mark.parametrize(
|
|
"classifier", [linear_model.Lars, linear_model.LarsCV, linear_model.LassoLarsIC]
|
|
)
|
|
def test_lars_precompute(classifier):
|
|
# Check for different values of precompute
|
|
G = np.dot(X.T, X)
|
|
|
|
clf = classifier(precompute=G)
|
|
output_1 = ignore_warnings(clf.fit)(X, y).coef_
|
|
for precompute in [True, False, "auto", None]:
|
|
clf = classifier(precompute=precompute)
|
|
output_2 = clf.fit(X, y).coef_
|
|
assert_array_almost_equal(output_1, output_2, decimal=8)
|
|
|
|
|
|
def test_singular_matrix():
|
|
# Test when input is a singular matrix
|
|
X1 = np.array([[1, 1.0], [1.0, 1.0]])
|
|
y1 = np.array([1, 1])
|
|
_, _, coef_path = linear_model.lars_path(X1, y1)
|
|
assert_array_almost_equal(coef_path.T, [[0, 0], [1, 0]])
|
|
|
|
|
|
def test_rank_deficient_design():
|
|
# consistency test that checks that LARS Lasso is handling rank
|
|
# deficient input data (with n_features < rank) in the same way
|
|
# as coordinate descent Lasso
|
|
y = [5, 0, 5]
|
|
for X in ([[5, 0], [0, 5], [10, 10]], [[10, 10, 0], [1e-32, 0, 0], [0, 0, 1]]):
|
|
# To be able to use the coefs to compute the objective function,
|
|
# we need to turn off normalization
|
|
lars = linear_model.LassoLars(0.1)
|
|
coef_lars_ = lars.fit(X, y).coef_
|
|
obj_lars = 1.0 / (2.0 * 3.0) * linalg.norm(
|
|
y - np.dot(X, coef_lars_)
|
|
) ** 2 + 0.1 * linalg.norm(coef_lars_, 1)
|
|
coord_descent = linear_model.Lasso(0.1, tol=1e-6)
|
|
coef_cd_ = coord_descent.fit(X, y).coef_
|
|
obj_cd = (1.0 / (2.0 * 3.0)) * linalg.norm(
|
|
y - np.dot(X, coef_cd_)
|
|
) ** 2 + 0.1 * linalg.norm(coef_cd_, 1)
|
|
assert obj_lars < obj_cd * (1.0 + 1e-8)
|
|
|
|
|
|
def test_lasso_lars_vs_lasso_cd():
|
|
# Test that LassoLars and Lasso using coordinate descent give the
|
|
# same results.
|
|
X = 3 * diabetes.data
|
|
|
|
alphas, _, lasso_path = linear_model.lars_path(X, y, method="lasso")
|
|
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
|
|
for c, a in zip(lasso_path.T, alphas):
|
|
if a == 0:
|
|
continue
|
|
lasso_cd.alpha = a
|
|
lasso_cd.fit(X, y)
|
|
error = linalg.norm(c - lasso_cd.coef_)
|
|
assert error < 0.01
|
|
|
|
# similar test, with the classifiers
|
|
for alpha in np.linspace(1e-2, 1 - 1e-2, 20):
|
|
clf1 = linear_model.LassoLars(alpha=alpha).fit(X, y)
|
|
clf2 = linear_model.Lasso(alpha=alpha, tol=1e-8).fit(X, y)
|
|
err = linalg.norm(clf1.coef_ - clf2.coef_)
|
|
assert err < 1e-3
|
|
|
|
# same test, with normalized data
|
|
X = diabetes.data
|
|
X = X - X.sum(axis=0)
|
|
X /= np.linalg.norm(X, axis=0)
|
|
alphas, _, lasso_path = linear_model.lars_path(X, y, method="lasso")
|
|
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
|
|
for c, a in zip(lasso_path.T, alphas):
|
|
if a == 0:
|
|
continue
|
|
lasso_cd.alpha = a
|
|
lasso_cd.fit(X, y)
|
|
error = linalg.norm(c - lasso_cd.coef_)
|
|
assert error < 0.01
|
|
|
|
|
|
@filterwarnings_normalize
|
|
def test_lasso_lars_vs_lasso_cd_early_stopping():
|
|
# Test that LassoLars and Lasso using coordinate descent give the
|
|
# same results when early stopping is used.
|
|
# (test : before, in the middle, and in the last part of the path)
|
|
alphas_min = [10, 0.9, 1e-4]
|
|
|
|
X = diabetes.data
|
|
|
|
for alpha_min in alphas_min:
|
|
alphas, _, lasso_path = linear_model.lars_path(
|
|
X, y, method="lasso", alpha_min=alpha_min
|
|
)
|
|
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
|
|
lasso_cd.alpha = alphas[-1]
|
|
lasso_cd.fit(X, y)
|
|
error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
|
|
assert error < 0.01
|
|
|
|
# same test, with normalization
|
|
X = diabetes.data - diabetes.data.sum(axis=0)
|
|
X /= np.linalg.norm(X, axis=0)
|
|
|
|
for alpha_min in alphas_min:
|
|
alphas, _, lasso_path = linear_model.lars_path(
|
|
X, y, method="lasso", alpha_min=alpha_min
|
|
)
|
|
lasso_cd = linear_model.Lasso(tol=1e-8)
|
|
lasso_cd.alpha = alphas[-1]
|
|
lasso_cd.fit(X, y)
|
|
error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
|
|
assert error < 0.01
|
|
|
|
|
|
@filterwarnings_normalize
|
|
def test_lasso_lars_path_length():
|
|
# Test that the path length of the LassoLars is right
|
|
lasso = linear_model.LassoLars()
|
|
lasso.fit(X, y)
|
|
lasso2 = linear_model.LassoLars(alpha=lasso.alphas_[2])
|
|
lasso2.fit(X, y)
|
|
assert_array_almost_equal(lasso.alphas_[:3], lasso2.alphas_)
|
|
# Also check that the sequence of alphas is always decreasing
|
|
assert np.all(np.diff(lasso.alphas_) < 0)
|
|
|
|
|
|
def test_lasso_lars_vs_lasso_cd_ill_conditioned():
|
|
# Test lasso lars on a very ill-conditioned design, and check that
|
|
# it does not blow up, and stays somewhat close to a solution given
|
|
# by the coordinate descent solver
|
|
# Also test that lasso_path (using lars_path output style) gives
|
|
# the same result as lars_path and previous lasso output style
|
|
# under these conditions.
|
|
rng = np.random.RandomState(42)
|
|
|
|
# Generate data
|
|
n, m = 70, 100
|
|
k = 5
|
|
X = rng.randn(n, m)
|
|
w = np.zeros((m, 1))
|
|
i = np.arange(0, m)
|
|
rng.shuffle(i)
|
|
supp = i[:k]
|
|
w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
|
|
y = np.dot(X, w)
|
|
sigma = 0.2
|
|
y += sigma * rng.rand(*y.shape)
|
|
y = y.squeeze()
|
|
lars_alphas, _, lars_coef = linear_model.lars_path(X, y, method="lasso")
|
|
|
|
_, lasso_coef2, _ = linear_model.lasso_path(X, y, alphas=lars_alphas, tol=1e-6)
|
|
|
|
assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
|
|
|
|
|
|
def test_lasso_lars_vs_lasso_cd_ill_conditioned2():
|
|
# Create an ill-conditioned situation in which the LARS has to go
|
|
# far in the path to converge, and check that LARS and coordinate
|
|
# descent give the same answers
|
|
# Note it used to be the case that Lars had to use the drop for good
|
|
# strategy for this but this is no longer the case with the
|
|
# equality_tolerance checks
|
|
X = [[1e20, 1e20, 0], [-1e-32, 0, 0], [1, 1, 1]]
|
|
y = [10, 10, 1]
|
|
alpha = 0.0001
|
|
|
|
def objective_function(coef):
|
|
return 1.0 / (2.0 * len(X)) * linalg.norm(
|
|
y - np.dot(X, coef)
|
|
) ** 2 + alpha * linalg.norm(coef, 1)
|
|
|
|
lars = linear_model.LassoLars(alpha=alpha)
|
|
warning_message = "Regressors in active set degenerate."
|
|
with pytest.warns(ConvergenceWarning, match=warning_message):
|
|
lars.fit(X, y)
|
|
lars_coef_ = lars.coef_
|
|
lars_obj = objective_function(lars_coef_)
|
|
|
|
coord_descent = linear_model.Lasso(alpha=alpha, tol=1e-4)
|
|
cd_coef_ = coord_descent.fit(X, y).coef_
|
|
cd_obj = objective_function(cd_coef_)
|
|
|
|
assert lars_obj < cd_obj * (1.0 + 1e-8)
|
|
|
|
|
|
@filterwarnings_normalize
|
|
def test_lars_add_features():
|
|
# assure that at least some features get added if necessary
|
|
# test for 6d2b4c
|
|
# Hilbert matrix
|
|
n = 5
|
|
H = 1.0 / (np.arange(1, n + 1) + np.arange(n)[:, np.newaxis])
|
|
clf = linear_model.Lars(fit_intercept=False).fit(H, np.arange(n))
|
|
assert np.all(np.isfinite(clf.coef_))
|
|
|
|
|
|
@filterwarnings_normalize
|
|
def test_lars_n_nonzero_coefs(verbose=False):
|
|
lars = linear_model.Lars(n_nonzero_coefs=6, verbose=verbose)
|
|
lars.fit(X, y)
|
|
assert len(lars.coef_.nonzero()[0]) == 6
|
|
# The path should be of length 6 + 1 in a Lars going down to 6
|
|
# non-zero coefs
|
|
assert len(lars.alphas_) == 7
|
|
|
|
|
|
@filterwarnings_normalize
|
|
@ignore_warnings
|
|
def test_multitarget():
|
|
# Assure that estimators receiving multidimensional y do the right thing
|
|
Y = np.vstack([y, y**2]).T
|
|
n_targets = Y.shape[1]
|
|
estimators = [
|
|
linear_model.LassoLars(),
|
|
linear_model.Lars(),
|
|
# regression test for gh-1615
|
|
linear_model.LassoLars(fit_intercept=False),
|
|
linear_model.Lars(fit_intercept=False),
|
|
]
|
|
|
|
for estimator in estimators:
|
|
estimator.fit(X, Y)
|
|
Y_pred = estimator.predict(X)
|
|
alphas, active, coef, path = (
|
|
estimator.alphas_,
|
|
estimator.active_,
|
|
estimator.coef_,
|
|
estimator.coef_path_,
|
|
)
|
|
for k in range(n_targets):
|
|
estimator.fit(X, Y[:, k])
|
|
y_pred = estimator.predict(X)
|
|
assert_array_almost_equal(alphas[k], estimator.alphas_)
|
|
assert_array_almost_equal(active[k], estimator.active_)
|
|
assert_array_almost_equal(coef[k], estimator.coef_)
|
|
assert_array_almost_equal(path[k], estimator.coef_path_)
|
|
assert_array_almost_equal(Y_pred[:, k], y_pred)
|
|
|
|
|
|
@filterwarnings_normalize
|
|
def test_lars_cv():
|
|
# Test the LassoLarsCV object by checking that the optimal alpha
|
|
# increases as the number of samples increases.
|
|
# This property is not actually guaranteed in general and is just a
|
|
# property of the given dataset, with the given steps chosen.
|
|
old_alpha = 0
|
|
lars_cv = linear_model.LassoLarsCV()
|
|
for length in (400, 200, 100):
|
|
X = diabetes.data[:length]
|
|
y = diabetes.target[:length]
|
|
lars_cv.fit(X, y)
|
|
np.testing.assert_array_less(old_alpha, lars_cv.alpha_)
|
|
old_alpha = lars_cv.alpha_
|
|
assert not hasattr(lars_cv, "n_nonzero_coefs")
|
|
|
|
|
|
def test_lars_cv_max_iter(recwarn):
|
|
warnings.simplefilter("always")
|
|
with np.errstate(divide="raise", invalid="raise"):
|
|
X = diabetes.data
|
|
y = diabetes.target
|
|
rng = np.random.RandomState(42)
|
|
x = rng.randn(len(y))
|
|
X = diabetes.data
|
|
X = np.c_[X, x, x] # add correlated features
|
|
X = StandardScaler().fit_transform(X)
|
|
lars_cv = linear_model.LassoLarsCV(max_iter=5, cv=5)
|
|
lars_cv.fit(X, y)
|
|
|
|
# Check that there is no warning in general and no ConvergenceWarning
|
|
# in particular.
|
|
# Materialize the string representation of the warning to get a more
|
|
# informative error message in case of AssertionError.
|
|
recorded_warnings = [str(w) for w in recwarn]
|
|
assert len(recorded_warnings) == 0
|
|
|
|
|
|
def test_lasso_lars_ic():
|
|
# Test the LassoLarsIC object by checking that
|
|
# - some good features are selected.
|
|
# - alpha_bic > alpha_aic
|
|
# - n_nonzero_bic < n_nonzero_aic
|
|
lars_bic = linear_model.LassoLarsIC("bic")
|
|
lars_aic = linear_model.LassoLarsIC("aic")
|
|
rng = np.random.RandomState(42)
|
|
X = diabetes.data
|
|
X = np.c_[X, rng.randn(X.shape[0], 5)] # add 5 bad features
|
|
X = StandardScaler().fit_transform(X)
|
|
lars_bic.fit(X, y)
|
|
lars_aic.fit(X, y)
|
|
nonzero_bic = np.where(lars_bic.coef_)[0]
|
|
nonzero_aic = np.where(lars_aic.coef_)[0]
|
|
assert lars_bic.alpha_ > lars_aic.alpha_
|
|
assert len(nonzero_bic) < len(nonzero_aic)
|
|
assert np.max(nonzero_bic) < diabetes.data.shape[1]
|
|
|
|
|
|
def test_lars_path_readonly_data():
|
|
# When using automated memory mapping on large input, the
|
|
# fold data is in read-only mode
|
|
# This is a non-regression test for:
|
|
# https://github.com/scikit-learn/scikit-learn/issues/4597
|
|
splitted_data = train_test_split(X, y, random_state=42)
|
|
with TempMemmap(splitted_data) as (X_train, X_test, y_train, y_test):
|
|
# The following should not fail despite copy=False
|
|
_lars_path_residues(X_train, y_train, X_test, y_test, copy=False)
|
|
|
|
|
|
def test_lars_path_positive_constraint():
|
|
# this is the main test for the positive parameter on the lars_path method
|
|
# the estimator classes just make use of this function
|
|
|
|
# we do the test on the diabetes dataset
|
|
|
|
# ensure that we get negative coefficients when positive=False
|
|
# and all positive when positive=True
|
|
# for method 'lar' (default) and lasso
|
|
|
|
err_msg = "Positive constraint not supported for 'lar' coding method."
|
|
with pytest.raises(ValueError, match=err_msg):
|
|
linear_model.lars_path(
|
|
diabetes["data"], diabetes["target"], method="lar", positive=True
|
|
)
|
|
|
|
method = "lasso"
|
|
_, _, coefs = linear_model.lars_path(
|
|
X, y, return_path=True, method=method, positive=False
|
|
)
|
|
assert coefs.min() < 0
|
|
|
|
_, _, coefs = linear_model.lars_path(
|
|
X, y, return_path=True, method=method, positive=True
|
|
)
|
|
assert coefs.min() >= 0
|
|
|
|
|
|
# now we gonna test the positive option for all estimator classes
|
|
|
|
default_parameter = {"fit_intercept": False}
|
|
|
|
estimator_parameter_map = {
|
|
"LassoLars": {"alpha": 0.1},
|
|
"LassoLarsCV": {},
|
|
"LassoLarsIC": {},
|
|
}
|
|
|
|
|
|
@filterwarnings_normalize
|
|
def test_estimatorclasses_positive_constraint():
|
|
# testing the transmissibility for the positive option of all estimator
|
|
# classes in this same function here
|
|
default_parameter = {"fit_intercept": False}
|
|
|
|
estimator_parameter_map = {
|
|
"LassoLars": {"alpha": 0.1},
|
|
"LassoLarsCV": {},
|
|
"LassoLarsIC": {},
|
|
}
|
|
for estname in estimator_parameter_map:
|
|
params = default_parameter.copy()
|
|
params.update(estimator_parameter_map[estname])
|
|
estimator = getattr(linear_model, estname)(positive=False, **params)
|
|
estimator.fit(X, y)
|
|
assert estimator.coef_.min() < 0
|
|
estimator = getattr(linear_model, estname)(positive=True, **params)
|
|
estimator.fit(X, y)
|
|
assert min(estimator.coef_) >= 0
|
|
|
|
|
|
def test_lasso_lars_vs_lasso_cd_positive():
|
|
# Test that LassoLars and Lasso using coordinate descent give the
|
|
# same results when using the positive option
|
|
|
|
# This test is basically a copy of the above with additional positive
|
|
# option. However for the middle part, the comparison of coefficient values
|
|
# for a range of alphas, we had to make an adaptations. See below.
|
|
|
|
# not normalized data
|
|
X = 3 * diabetes.data
|
|
|
|
alphas, _, lasso_path = linear_model.lars_path(X, y, method="lasso", positive=True)
|
|
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8, positive=True)
|
|
for c, a in zip(lasso_path.T, alphas):
|
|
if a == 0:
|
|
continue
|
|
lasso_cd.alpha = a
|
|
lasso_cd.fit(X, y)
|
|
error = linalg.norm(c - lasso_cd.coef_)
|
|
assert error < 0.01
|
|
|
|
# The range of alphas chosen for coefficient comparison here is restricted
|
|
# as compared with the above test without the positive option. This is due
|
|
# to the circumstance that the Lars-Lasso algorithm does not converge to
|
|
# the least-squares-solution for small alphas, see 'Least Angle Regression'
|
|
# by Efron et al 2004. The coefficients are typically in congruence up to
|
|
# the smallest alpha reached by the Lars-Lasso algorithm and start to
|
|
# diverge thereafter. See
|
|
# https://gist.github.com/michigraber/7e7d7c75eca694c7a6ff
|
|
|
|
for alpha in np.linspace(6e-1, 1 - 1e-2, 20):
|
|
clf1 = linear_model.LassoLars(
|
|
fit_intercept=False, alpha=alpha, positive=True
|
|
).fit(X, y)
|
|
clf2 = linear_model.Lasso(
|
|
fit_intercept=False, alpha=alpha, tol=1e-8, positive=True
|
|
).fit(X, y)
|
|
err = linalg.norm(clf1.coef_ - clf2.coef_)
|
|
assert err < 1e-3
|
|
|
|
# normalized data
|
|
X = diabetes.data - diabetes.data.sum(axis=0)
|
|
X /= np.linalg.norm(X, axis=0)
|
|
alphas, _, lasso_path = linear_model.lars_path(X, y, method="lasso", positive=True)
|
|
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8, positive=True)
|
|
for c, a in zip(lasso_path.T[:-1], alphas[:-1]): # don't include alpha=0
|
|
lasso_cd.alpha = a
|
|
lasso_cd.fit(X, y)
|
|
error = linalg.norm(c - lasso_cd.coef_)
|
|
assert error < 0.01
|
|
|
|
|
|
def test_lasso_lars_vs_R_implementation():
|
|
# Test that sklearn LassoLars implementation agrees with the LassoLars
|
|
# implementation available in R (lars library) when fit_intercept=False.
|
|
|
|
# Let's generate the data used in the bug report 7778
|
|
y = np.array([-6.45006793, -3.51251449, -8.52445396, 6.12277822, -19.42109366])
|
|
x = np.array(
|
|
[
|
|
[0.47299829, 0, 0, 0, 0],
|
|
[0.08239882, 0.85784863, 0, 0, 0],
|
|
[0.30114139, -0.07501577, 0.80895216, 0, 0],
|
|
[-0.01460346, -0.1015233, 0.0407278, 0.80338378, 0],
|
|
[-0.69363927, 0.06754067, 0.18064514, -0.0803561, 0.40427291],
|
|
]
|
|
)
|
|
|
|
X = x.T
|
|
|
|
# The R result was obtained using the following code:
|
|
#
|
|
# library(lars)
|
|
# model_lasso_lars = lars(X, t(y), type="lasso", intercept=FALSE,
|
|
# trace=TRUE, normalize=FALSE)
|
|
# r = t(model_lasso_lars$beta)
|
|
#
|
|
|
|
r = np.array(
|
|
[
|
|
[
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
-79.810362809499026,
|
|
-83.528788732782829,
|
|
-83.777653739190711,
|
|
-83.784156932888934,
|
|
-84.033390591756657,
|
|
],
|
|
[0, 0, 0, 0, -0.476624256777266, 0, 0, 0, 0, 0.025219751009936],
|
|
[
|
|
0,
|
|
-3.577397088285891,
|
|
-4.702795355871871,
|
|
-7.016748621359461,
|
|
-7.614898471899412,
|
|
-0.336938391359179,
|
|
0,
|
|
0,
|
|
0.001213370600853,
|
|
0.048162321585148,
|
|
],
|
|
[
|
|
0,
|
|
0,
|
|
0,
|
|
2.231558436628169,
|
|
2.723267514525966,
|
|
2.811549786389614,
|
|
2.813766976061531,
|
|
2.817462468949557,
|
|
2.817368178703816,
|
|
2.816221090636795,
|
|
],
|
|
[
|
|
0,
|
|
0,
|
|
-1.218422599914637,
|
|
-3.457726183014808,
|
|
-4.021304522060710,
|
|
-45.827461592423745,
|
|
-47.776608869312305,
|
|
-47.911561610746404,
|
|
-47.914845922736234,
|
|
-48.039562334265717,
|
|
],
|
|
]
|
|
)
|
|
|
|
model_lasso_lars = linear_model.LassoLars(alpha=0, fit_intercept=False)
|
|
model_lasso_lars.fit(X, y)
|
|
skl_betas = model_lasso_lars.coef_path_
|
|
|
|
assert_array_almost_equal(r, skl_betas, decimal=12)
|
|
|
|
|
|
@filterwarnings_normalize
|
|
@pytest.mark.parametrize("copy_X", [True, False])
|
|
def test_lasso_lars_copyX_behaviour(copy_X):
|
|
"""
|
|
Test that user input regarding copy_X is not being overridden (it was until
|
|
at least version 0.21)
|
|
|
|
"""
|
|
lasso_lars = LassoLarsIC(copy_X=copy_X, precompute=False)
|
|
rng = np.random.RandomState(0)
|
|
X = rng.normal(0, 1, (100, 5))
|
|
X_copy = X.copy()
|
|
y = X[:, 2]
|
|
lasso_lars.fit(X, y)
|
|
assert copy_X == np.array_equal(X, X_copy)
|
|
|
|
|
|
@filterwarnings_normalize
|
|
@pytest.mark.parametrize("copy_X", [True, False])
|
|
def test_lasso_lars_fit_copyX_behaviour(copy_X):
|
|
"""
|
|
Test that user input to .fit for copy_X overrides default __init__ value
|
|
|
|
"""
|
|
lasso_lars = LassoLarsIC(precompute=False)
|
|
rng = np.random.RandomState(0)
|
|
X = rng.normal(0, 1, (100, 5))
|
|
X_copy = X.copy()
|
|
y = X[:, 2]
|
|
lasso_lars.fit(X, y, copy_X=copy_X)
|
|
assert copy_X == np.array_equal(X, X_copy)
|
|
|
|
|
|
@filterwarnings_normalize
|
|
@pytest.mark.parametrize("est", (LassoLars(alpha=1e-3), Lars()))
|
|
def test_lars_with_jitter(est):
|
|
# Test that a small amount of jitter helps stability,
|
|
# using example provided in issue #2746
|
|
|
|
X = np.array([[0.0, 0.0, 0.0, -1.0, 0.0], [0.0, -1.0, 0.0, 0.0, 0.0]])
|
|
y = [-2.5, -2.5]
|
|
expected_coef = [0, 2.5, 0, 2.5, 0]
|
|
|
|
# set to fit_intercept to False since target is constant and we want check
|
|
# the value of coef. coef would be all zeros otherwise.
|
|
est.set_params(fit_intercept=False)
|
|
est_jitter = clone(est).set_params(jitter=10e-8, random_state=0)
|
|
|
|
est.fit(X, y)
|
|
est_jitter.fit(X, y)
|
|
|
|
assert np.mean((est.coef_ - est_jitter.coef_) ** 2) > 0.1
|
|
np.testing.assert_allclose(est_jitter.coef_, expected_coef, rtol=1e-3)
|
|
|
|
|
|
def test_X_none_gram_not_none():
|
|
with pytest.raises(ValueError, match="X cannot be None if Gram is not None"):
|
|
lars_path(X=None, y=[1], Gram="not None")
|
|
|
|
|
|
def test_copy_X_with_auto_gram():
|
|
# Non-regression test for #17789, `copy_X=True` and Gram='auto' does not
|
|
# overwrite X
|
|
rng = np.random.RandomState(42)
|
|
X = rng.rand(6, 6)
|
|
y = rng.rand(6)
|
|
|
|
X_before = X.copy()
|
|
linear_model.lars_path(X, y, Gram="auto", copy_X=True, method="lasso")
|
|
# X did not change
|
|
assert_allclose(X, X_before)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"LARS, has_coef_path, args",
|
|
(
|
|
(Lars, True, {}),
|
|
(LassoLars, True, {}),
|
|
(LassoLarsIC, False, {}),
|
|
(LarsCV, True, {}),
|
|
# max_iter=5 is for avoiding ConvergenceWarning
|
|
(LassoLarsCV, True, {"max_iter": 5}),
|
|
),
|
|
)
|
|
@pytest.mark.parametrize("dtype", (np.float32, np.float64))
|
|
@filterwarnings_normalize
|
|
def test_lars_dtype_match(LARS, has_coef_path, args, dtype):
|
|
# The test ensures that the fit method preserves input dtype
|
|
rng = np.random.RandomState(0)
|
|
X = rng.rand(20, 6).astype(dtype)
|
|
y = rng.rand(20).astype(dtype)
|
|
|
|
model = LARS(**args)
|
|
model.fit(X, y)
|
|
assert model.coef_.dtype == dtype
|
|
if has_coef_path:
|
|
assert model.coef_path_.dtype == dtype
|
|
assert model.intercept_.dtype == dtype
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"LARS, has_coef_path, args",
|
|
(
|
|
(Lars, True, {}),
|
|
(LassoLars, True, {}),
|
|
(LassoLarsIC, False, {}),
|
|
(LarsCV, True, {}),
|
|
# max_iter=5 is for avoiding ConvergenceWarning
|
|
(LassoLarsCV, True, {"max_iter": 5}),
|
|
),
|
|
)
|
|
@filterwarnings_normalize
|
|
def test_lars_numeric_consistency(LARS, has_coef_path, args):
|
|
# The test ensures numerical consistency between trained coefficients
|
|
# of float32 and float64.
|
|
rtol = 1e-5
|
|
atol = 1e-5
|
|
|
|
rng = np.random.RandomState(0)
|
|
X_64 = rng.rand(10, 6)
|
|
y_64 = rng.rand(10)
|
|
|
|
model_64 = LARS(**args).fit(X_64, y_64)
|
|
model_32 = LARS(**args).fit(X_64.astype(np.float32), y_64.astype(np.float32))
|
|
|
|
assert_allclose(model_64.coef_, model_32.coef_, rtol=rtol, atol=atol)
|
|
if has_coef_path:
|
|
assert_allclose(model_64.coef_path_, model_32.coef_path_, rtol=rtol, atol=atol)
|
|
assert_allclose(model_64.intercept_, model_32.intercept_, rtol=rtol, atol=atol)
|
|
|
|
|
|
@pytest.mark.parametrize("criterion", ["aic", "bic"])
|
|
def test_lassolarsic_alpha_selection(criterion):
|
|
"""Check that we properly compute the AIC and BIC score.
|
|
|
|
In this test, we reproduce the example of the Fig. 2 of Zou et al.
|
|
(reference [1] in LassoLarsIC) In this example, only 7 features should be
|
|
selected.
|
|
"""
|
|
model = make_pipeline(StandardScaler(), LassoLarsIC(criterion=criterion))
|
|
model.fit(X, y)
|
|
|
|
best_alpha_selected = np.argmin(model[-1].criterion_)
|
|
assert best_alpha_selected == 7
|
|
|
|
|
|
@pytest.mark.parametrize("fit_intercept", [True, False])
|
|
def test_lassolarsic_noise_variance(fit_intercept):
|
|
"""Check the behaviour when `n_samples` < `n_features` and that one needs
|
|
to provide the noise variance."""
|
|
rng = np.random.RandomState(0)
|
|
X, y = datasets.make_regression(
|
|
n_samples=10, n_features=11 - fit_intercept, random_state=rng
|
|
)
|
|
|
|
model = make_pipeline(StandardScaler(), LassoLarsIC(fit_intercept=fit_intercept))
|
|
|
|
err_msg = (
|
|
"You are using LassoLarsIC in the case where the number of samples is smaller"
|
|
" than the number of features"
|
|
)
|
|
with pytest.raises(ValueError, match=err_msg):
|
|
model.fit(X, y)
|
|
|
|
model.set_params(lassolarsic__noise_variance=1.0)
|
|
model.fit(X, y).predict(X)
|