from itertools import product import numpy as np from numpy.testing import assert_almost_equal, assert_array_almost_equal from scipy import linalg import pytest from sklearn import neighbors, manifold from sklearn.manifold._locally_linear import barycenter_kneighbors_graph from sklearn.utils._testing import ignore_warnings from sklearn.utils._testing import assert_raise_message eigen_solvers = ['dense', 'arpack'] # ---------------------------------------------------------------------- # Test utility routines def test_barycenter_kneighbors_graph(): X = np.array([[0, 1], [1.01, 1.], [2, 0]]) A = barycenter_kneighbors_graph(X, 1) assert_array_almost_equal( A.toarray(), [[0., 1., 0.], [1., 0., 0.], [0., 1., 0.]]) A = barycenter_kneighbors_graph(X, 2) # check that columns sum to one assert_array_almost_equal(np.sum(A.toarray(), 1), np.ones(3)) pred = np.dot(A.toarray(), X) assert linalg.norm(pred - X) / X.shape[0] < 1 # ---------------------------------------------------------------------- # Test LLE by computing the reconstruction error on some manifolds. def test_lle_simple_grid(): # note: ARPACK is numerically unstable, so this test will fail for # some random seeds. We choose 2 because the tests pass. rng = np.random.RandomState(2) # grid of equidistant points in 2D, n_components = n_dim X = np.array(list(product(range(5), repeat=2))) X = X + 1e-10 * rng.uniform(size=X.shape) n_components = 2 clf = manifold.LocallyLinearEmbedding(n_neighbors=5, n_components=n_components, random_state=rng) tol = 0.1 N = barycenter_kneighbors_graph(X, clf.n_neighbors).toarray() reconstruction_error = linalg.norm(np.dot(N, X) - X, 'fro') assert reconstruction_error < tol for solver in eigen_solvers: clf.set_params(eigen_solver=solver) clf.fit(X) assert clf.embedding_.shape[1] == n_components reconstruction_error = linalg.norm( np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2 assert reconstruction_error < tol assert_almost_equal(clf.reconstruction_error_, reconstruction_error, decimal=1) # re-embed a noisy version of X using the transform method noise = rng.randn(*X.shape) / 100 X_reembedded = clf.transform(X + noise) assert linalg.norm(X_reembedded - clf.embedding_) < tol def test_lle_manifold(): rng = np.random.RandomState(0) # similar test on a slightly more complex manifold X = np.array(list(product(np.arange(18), repeat=2))) X = np.c_[X, X[:, 0] ** 2 / 18] X = X + 1e-10 * rng.uniform(size=X.shape) n_components = 2 for method in ["standard", "hessian", "modified", "ltsa"]: clf = manifold.LocallyLinearEmbedding(n_neighbors=6, n_components=n_components, method=method, random_state=0) tol = 1.5 if method == "standard" else 3 N = barycenter_kneighbors_graph(X, clf.n_neighbors).toarray() reconstruction_error = linalg.norm(np.dot(N, X) - X) assert reconstruction_error < tol for solver in eigen_solvers: clf.set_params(eigen_solver=solver) clf.fit(X) assert clf.embedding_.shape[1] == n_components reconstruction_error = linalg.norm( np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2 details = ("solver: %s, method: %s" % (solver, method)) assert reconstruction_error < tol, details assert (np.abs(clf.reconstruction_error_ - reconstruction_error) < tol * reconstruction_error), details # Test the error raised when parameter passed to lle is invalid def test_lle_init_parameters(): X = np.random.rand(5, 3) clf = manifold.LocallyLinearEmbedding(eigen_solver="error") msg = "unrecognized eigen_solver 'error'" assert_raise_message(ValueError, msg, clf.fit, X) clf = manifold.LocallyLinearEmbedding(method="error") msg = "unrecognized method 'error'" assert_raise_message(ValueError, msg, clf.fit, X) def test_pipeline(): # check that LocallyLinearEmbedding works fine as a Pipeline # only checks that no error is raised. # TODO check that it actually does something useful from sklearn import pipeline, datasets X, y = datasets.make_blobs(random_state=0) clf = pipeline.Pipeline( [('filter', manifold.LocallyLinearEmbedding(random_state=0)), ('clf', neighbors.KNeighborsClassifier())]) clf.fit(X, y) assert .9 < clf.score(X, y) # Test the error raised when the weight matrix is singular def test_singular_matrix(): M = np.ones((10, 3)) f = ignore_warnings with pytest.raises(ValueError): f(manifold.locally_linear_embedding(M, n_neighbors=2, n_components=1, method='standard', eigen_solver='arpack')) # regression test for #6033 def test_integer_input(): rand = np.random.RandomState(0) X = rand.randint(0, 100, size=(20, 3)) for method in ["standard", "hessian", "modified", "ltsa"]: clf = manifold.LocallyLinearEmbedding(method=method, n_neighbors=10) clf.fit(X) # this previously raised a TypeError