Inzynierka/Lib/site-packages/sklearn/neighbors/tests/test_nearest_centroid.py

"""
Testing for the nearest centroid module.
"""
import numpy as np
import pytest
from scipy import sparse as sp
from numpy.testing import assert_array_equal

from sklearn.neighbors import NearestCentroid
from sklearn import datasets

# toy sample
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
X_csr = sp.csr_matrix(X)  # Sparse matrix
y = [-1, -1, -1, 1, 1, 1]
T = [[-1, -1], [2, 2], [3, 2]]
T_csr = sp.csr_matrix(T)
true_result = [-1, 1, 1]

# also load the iris dataset
# and randomly permute it
iris = datasets.load_iris()
rng = np.random.RandomState(1)
perm = rng.permutation(iris.target.size)
iris.data = iris.data[perm]
iris.target = iris.target[perm]


def test_classification_toy():
    # Check classification on a toy dataset, including sparse versions.
    clf = NearestCentroid()
    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)

    # Same test, but with a sparse matrix to fit and test.
    clf = NearestCentroid()
    clf.fit(X_csr, y)
    assert_array_equal(clf.predict(T_csr), true_result)

    # Fit with sparse, test with non-sparse
    clf = NearestCentroid()
    clf.fit(X_csr, y)
    assert_array_equal(clf.predict(T), true_result)

    # Fit with non-sparse, test with sparse
    clf = NearestCentroid()
    clf.fit(X, y)
    assert_array_equal(clf.predict(T_csr), true_result)

    # Fit and predict with non-CSR sparse matrices
    clf = NearestCentroid()
    clf.fit(X_csr.tocoo(), y)
    assert_array_equal(clf.predict(T_csr.tolil()), true_result)


def test_iris():
    # Check consistency on dataset iris.
    for metric in ("euclidean", "cosine"):
        clf = NearestCentroid(metric=metric).fit(iris.data, iris.target)
        score = np.mean(clf.predict(iris.data) == iris.target)
        assert score > 0.9, "Failed with score = " + str(score)


def test_iris_shrinkage():
    # Check consistency on dataset iris, when using shrinkage.
    for metric in ("euclidean", "cosine"):
        for shrink_threshold in [None, 0.1, 0.5]:
            clf = NearestCentroid(metric=metric, shrink_threshold=shrink_threshold)
            clf = clf.fit(iris.data, iris.target)
            score = np.mean(clf.predict(iris.data) == iris.target)
            assert score > 0.8, "Failed with score = " + str(score)


def test_pickle():
    import pickle

    # classification
    obj = NearestCentroid()
    obj.fit(iris.data, iris.target)
    score = obj.score(iris.data, iris.target)
    s = pickle.dumps(obj)

    obj2 = pickle.loads(s)
    assert type(obj2) == obj.__class__
    score2 = obj2.score(iris.data, iris.target)
    assert_array_equal(
        score,
        score2,
        "Failed to generate same score after pickling (classification).",
    )


def test_shrinkage_correct():
    # Ensure that the shrinking is correct.
    # The expected result is calculated by R (pamr),
    # which is implemented by the author of the original paper.
    # (One need to modify the code to output the new centroid in pamr.predict)

    X = np.array([[0, 1], [1, 0], [1, 1], [2, 0], [6, 8]])
    y = np.array([1, 1, 2, 2, 2])
    clf = NearestCentroid(shrink_threshold=0.1)
    clf.fit(X, y)
    expected_result = np.array([[0.7787310, 0.8545292], [2.814179, 2.763647]])
    np.testing.assert_array_almost_equal(clf.centroids_, expected_result)


def test_shrinkage_threshold_decoded_y():
    clf = NearestCentroid(shrink_threshold=0.01)
    y_ind = np.asarray(y)
    y_ind[y_ind == -1] = 0
    clf.fit(X, y_ind)
    centroid_encoded = clf.centroids_
    clf.fit(X, y)
    assert_array_equal(centroid_encoded, clf.centroids_)


def test_predict_translated_data():
    # Test that NearestCentroid gives same results on translated data

    rng = np.random.RandomState(0)
    X = rng.rand(50, 50)
    y = rng.randint(0, 3, 50)
    noise = rng.rand(50)
    clf = NearestCentroid(shrink_threshold=0.1)
    clf.fit(X, y)
    y_init = clf.predict(X)
    clf = NearestCentroid(shrink_threshold=0.1)
    X_noise = X + noise
    clf.fit(X_noise, y)
    y_translate = clf.predict(X_noise)
    assert_array_equal(y_init, y_translate)


def test_manhattan_metric():
    # Test the manhattan metric.

    clf = NearestCentroid(metric="manhattan")
    clf.fit(X, y)
    dense_centroid = clf.centroids_
    clf.fit(X_csr, y)
    assert_array_equal(clf.centroids_, dense_centroid)
    assert_array_equal(dense_centroid, [[-1, -1], [1, 1]])


def test_features_zero_var():
    # Test that features with 0 variance throw error

    X = np.empty((10, 2))
    X[:, 0] = -0.13725701
    X[:, 1] = -0.9853293
    y = np.zeros((10))
    y[0] = 1

    clf = NearestCentroid(shrink_threshold=0.1)
    with pytest.raises(ValueError):
        clf.fit(X, y)
first commit 2023-06-02 12:51:02 +02:00			`"""`
			`Testing for the nearest centroid module.`
			`"""`
			`import numpy as np`
			`import pytest`
			`from scipy import sparse as sp`
			`from numpy.testing import assert_array_equal`

			`from sklearn.neighbors import NearestCentroid`
			`from sklearn import datasets`

			`# toy sample`
			`X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]`
			`X_csr = sp.csr_matrix(X) # Sparse matrix`
			`y = [-1, -1, -1, 1, 1, 1]`
			`T = [[-1, -1], [2, 2], [3, 2]]`
			`T_csr = sp.csr_matrix(T)`
			`true_result = [-1, 1, 1]`

			`# also load the iris dataset`
			`# and randomly permute it`
			`iris = datasets.load_iris()`
			`rng = np.random.RandomState(1)`
			`perm = rng.permutation(iris.target.size)`
			`iris.data = iris.data[perm]`
			`iris.target = iris.target[perm]`


			`def test_classification_toy():`
			`# Check classification on a toy dataset, including sparse versions.`
			`clf = NearestCentroid()`
			`clf.fit(X, y)`
			`assert_array_equal(clf.predict(T), true_result)`

			`# Same test, but with a sparse matrix to fit and test.`
			`clf = NearestCentroid()`
			`clf.fit(X_csr, y)`
			`assert_array_equal(clf.predict(T_csr), true_result)`

			`# Fit with sparse, test with non-sparse`
			`clf = NearestCentroid()`
			`clf.fit(X_csr, y)`
			`assert_array_equal(clf.predict(T), true_result)`

			`# Fit with non-sparse, test with sparse`
			`clf = NearestCentroid()`
			`clf.fit(X, y)`
			`assert_array_equal(clf.predict(T_csr), true_result)`

			`# Fit and predict with non-CSR sparse matrices`
			`clf = NearestCentroid()`
			`clf.fit(X_csr.tocoo(), y)`
			`assert_array_equal(clf.predict(T_csr.tolil()), true_result)`


			`def test_iris():`
			`# Check consistency on dataset iris.`
			`for metric in ("euclidean", "cosine"):`
			`clf = NearestCentroid(metric=metric).fit(iris.data, iris.target)`
			`score = np.mean(clf.predict(iris.data) == iris.target)`
			`assert score > 0.9, "Failed with score = " + str(score)`


			`def test_iris_shrinkage():`
			`# Check consistency on dataset iris, when using shrinkage.`
			`for metric in ("euclidean", "cosine"):`
			`for shrink_threshold in [None, 0.1, 0.5]:`
			`clf = NearestCentroid(metric=metric, shrink_threshold=shrink_threshold)`
			`clf = clf.fit(iris.data, iris.target)`
			`score = np.mean(clf.predict(iris.data) == iris.target)`
			`assert score > 0.8, "Failed with score = " + str(score)`


			`def test_pickle():`
			`import pickle`

			`# classification`
			`obj = NearestCentroid()`
			`obj.fit(iris.data, iris.target)`
			`score = obj.score(iris.data, iris.target)`
			`s = pickle.dumps(obj)`

			`obj2 = pickle.loads(s)`
			`assert type(obj2) == obj.__class__`
			`score2 = obj2.score(iris.data, iris.target)`
			`assert_array_equal(`
			`score,`
			`score2,`
			`"Failed to generate same score after pickling (classification).",`
			`)`


			`def test_shrinkage_correct():`
			`# Ensure that the shrinking is correct.`
			`# The expected result is calculated by R (pamr),`
			`# which is implemented by the author of the original paper.`
			`# (One need to modify the code to output the new centroid in pamr.predict)`

			`X = np.array([[0, 1], [1, 0], [1, 1], [2, 0], [6, 8]])`
			`y = np.array([1, 1, 2, 2, 2])`
			`clf = NearestCentroid(shrink_threshold=0.1)`
			`clf.fit(X, y)`
			`expected_result = np.array([[0.7787310, 0.8545292], [2.814179, 2.763647]])`
			`np.testing.assert_array_almost_equal(clf.centroids_, expected_result)`


			`def test_shrinkage_threshold_decoded_y():`
			`clf = NearestCentroid(shrink_threshold=0.01)`
			`y_ind = np.asarray(y)`
			`y_ind[y_ind == -1] = 0`
			`clf.fit(X, y_ind)`
			`centroid_encoded = clf.centroids_`
			`clf.fit(X, y)`
			`assert_array_equal(centroid_encoded, clf.centroids_)`


			`def test_predict_translated_data():`
			`# Test that NearestCentroid gives same results on translated data`

			`rng = np.random.RandomState(0)`
			`X = rng.rand(50, 50)`
			`y = rng.randint(0, 3, 50)`
			`noise = rng.rand(50)`
			`clf = NearestCentroid(shrink_threshold=0.1)`
			`clf.fit(X, y)`
			`y_init = clf.predict(X)`
			`clf = NearestCentroid(shrink_threshold=0.1)`
			`X_noise = X + noise`
			`clf.fit(X_noise, y)`
			`y_translate = clf.predict(X_noise)`
			`assert_array_equal(y_init, y_translate)`


			`def test_manhattan_metric():`
			`# Test the manhattan metric.`

			`clf = NearestCentroid(metric="manhattan")`
			`clf.fit(X, y)`
			`dense_centroid = clf.centroids_`
			`clf.fit(X_csr, y)`
			`assert_array_equal(clf.centroids_, dense_centroid)`
			`assert_array_equal(dense_centroid, [[-1, -1], [1, 1]])`


			`def test_features_zero_var():`
			`# Test that features with 0 variance throw error`

			`X = np.empty((10, 2))`
			`X[:, 0] = -0.13725701`
			`X[:, 1] = -0.9853293`
			`y = np.zeros((10))`
			`y[0] = 1`

			`clf = NearestCentroid(shrink_threshold=0.1)`
			`with pytest.raises(ValueError):`
			`clf.fit(X, y)`