Inzynierka/Lib/site-packages/sklearn/metrics/cluster/tests/test_supervised.py

import warnings

import numpy as np
import pytest

from sklearn.metrics.cluster import adjusted_mutual_info_score
from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.metrics.cluster import rand_score
from sklearn.metrics.cluster import completeness_score
from sklearn.metrics.cluster import contingency_matrix
from sklearn.metrics.cluster import pair_confusion_matrix
from sklearn.metrics.cluster import entropy
from sklearn.metrics.cluster import expected_mutual_information
from sklearn.metrics.cluster import fowlkes_mallows_score
from sklearn.metrics.cluster import homogeneity_completeness_v_measure
from sklearn.metrics.cluster import homogeneity_score
from sklearn.metrics.cluster import mutual_info_score
from sklearn.metrics.cluster import normalized_mutual_info_score
from sklearn.metrics.cluster import v_measure_score
from sklearn.metrics.cluster._supervised import _generalized_average
from sklearn.metrics.cluster._supervised import check_clusterings

from sklearn.utils import assert_all_finite
from sklearn.utils._testing import assert_almost_equal
from numpy.testing import assert_array_equal, assert_array_almost_equal, assert_allclose


score_funcs = [
    adjusted_rand_score,
    rand_score,
    homogeneity_score,
    completeness_score,
    v_measure_score,
    adjusted_mutual_info_score,
    normalized_mutual_info_score,
]


def test_error_messages_on_wrong_input():
    for score_func in score_funcs:
        expected = (
            r"Found input variables with inconsistent numbers " r"of samples: \[2, 3\]"
        )
        with pytest.raises(ValueError, match=expected):
            score_func([0, 1], [1, 1, 1])

        expected = r"labels_true must be 1D: shape is \(2"
        with pytest.raises(ValueError, match=expected):
            score_func([[0, 1], [1, 0]], [1, 1, 1])

        expected = r"labels_pred must be 1D: shape is \(2"
        with pytest.raises(ValueError, match=expected):
            score_func([0, 1, 0], [[1, 1], [0, 0]])


def test_generalized_average():
    a, b = 1, 2
    methods = ["min", "geometric", "arithmetic", "max"]
    means = [_generalized_average(a, b, method) for method in methods]
    assert means[0] <= means[1] <= means[2] <= means[3]
    c, d = 12, 12
    means = [_generalized_average(c, d, method) for method in methods]
    assert means[0] == means[1] == means[2] == means[3]


def test_perfect_matches():
    for score_func in score_funcs:
        assert score_func([], []) == pytest.approx(1.0)
        assert score_func([0], [1]) == pytest.approx(1.0)
        assert score_func([0, 0, 0], [0, 0, 0]) == pytest.approx(1.0)
        assert score_func([0, 1, 0], [42, 7, 42]) == pytest.approx(1.0)
        assert score_func([0.0, 1.0, 0.0], [42.0, 7.0, 42.0]) == pytest.approx(1.0)
        assert score_func([0.0, 1.0, 2.0], [42.0, 7.0, 2.0]) == pytest.approx(1.0)
        assert score_func([0, 1, 2], [42, 7, 2]) == pytest.approx(1.0)
    score_funcs_with_changing_means = [
        normalized_mutual_info_score,
        adjusted_mutual_info_score,
    ]
    means = {"min", "geometric", "arithmetic", "max"}
    for score_func in score_funcs_with_changing_means:
        for mean in means:
            assert score_func([], [], average_method=mean) == pytest.approx(1.0)
            assert score_func([0], [1], average_method=mean) == pytest.approx(1.0)
            assert score_func(
                [0, 0, 0], [0, 0, 0], average_method=mean
            ) == pytest.approx(1.0)
            assert score_func(
                [0, 1, 0], [42, 7, 42], average_method=mean
            ) == pytest.approx(1.0)
            assert score_func(
                [0.0, 1.0, 0.0], [42.0, 7.0, 42.0], average_method=mean
            ) == pytest.approx(1.0)
            assert score_func(
                [0.0, 1.0, 2.0], [42.0, 7.0, 2.0], average_method=mean
            ) == pytest.approx(1.0)
            assert score_func(
                [0, 1, 2], [42, 7, 2], average_method=mean
            ) == pytest.approx(1.0)


def test_homogeneous_but_not_complete_labeling():
    # homogeneous but not complete clustering
    h, c, v = homogeneity_completeness_v_measure([0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 2, 2])
    assert_almost_equal(h, 1.00, 2)
    assert_almost_equal(c, 0.69, 2)
    assert_almost_equal(v, 0.81, 2)


def test_complete_but_not_homogeneous_labeling():
    # complete but not homogeneous clustering
    h, c, v = homogeneity_completeness_v_measure([0, 0, 1, 1, 2, 2], [0, 0, 1, 1, 1, 1])
    assert_almost_equal(h, 0.58, 2)
    assert_almost_equal(c, 1.00, 2)
    assert_almost_equal(v, 0.73, 2)


def test_not_complete_and_not_homogeneous_labeling():
    # neither complete nor homogeneous but not so bad either
    h, c, v = homogeneity_completeness_v_measure([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])
    assert_almost_equal(h, 0.67, 2)
    assert_almost_equal(c, 0.42, 2)
    assert_almost_equal(v, 0.52, 2)


def test_beta_parameter():
    # test for when beta passed to
    # homogeneity_completeness_v_measure
    # and v_measure_score
    beta_test = 0.2
    h_test = 0.67
    c_test = 0.42
    v_test = (1 + beta_test) * h_test * c_test / (beta_test * h_test + c_test)

    h, c, v = homogeneity_completeness_v_measure(
        [0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2], beta=beta_test
    )
    assert_almost_equal(h, h_test, 2)
    assert_almost_equal(c, c_test, 2)
    assert_almost_equal(v, v_test, 2)

    v = v_measure_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2], beta=beta_test)
    assert_almost_equal(v, v_test, 2)


def test_non_consecutive_labels():
    # regression tests for labels with gaps
    h, c, v = homogeneity_completeness_v_measure([0, 0, 0, 2, 2, 2], [0, 1, 0, 1, 2, 2])
    assert_almost_equal(h, 0.67, 2)
    assert_almost_equal(c, 0.42, 2)
    assert_almost_equal(v, 0.52, 2)

    h, c, v = homogeneity_completeness_v_measure([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])
    assert_almost_equal(h, 0.67, 2)
    assert_almost_equal(c, 0.42, 2)
    assert_almost_equal(v, 0.52, 2)

    ari_1 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])
    ari_2 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])
    assert_almost_equal(ari_1, 0.24, 2)
    assert_almost_equal(ari_2, 0.24, 2)

    ri_1 = rand_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])
    ri_2 = rand_score([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])
    assert_almost_equal(ri_1, 0.66, 2)
    assert_almost_equal(ri_2, 0.66, 2)


def uniform_labelings_scores(score_func, n_samples, k_range, n_runs=10, seed=42):
    # Compute score for random uniform cluster labelings
    random_labels = np.random.RandomState(seed).randint
    scores = np.zeros((len(k_range), n_runs))
    for i, k in enumerate(k_range):
        for j in range(n_runs):
            labels_a = random_labels(low=0, high=k, size=n_samples)
            labels_b = random_labels(low=0, high=k, size=n_samples)
            scores[i, j] = score_func(labels_a, labels_b)
    return scores


def test_adjustment_for_chance():
    # Check that adjusted scores are almost zero on random labels
    n_clusters_range = [2, 10, 50, 90]
    n_samples = 100
    n_runs = 10

    scores = uniform_labelings_scores(
        adjusted_rand_score, n_samples, n_clusters_range, n_runs
    )

    max_abs_scores = np.abs(scores).max(axis=1)
    assert_array_almost_equal(max_abs_scores, [0.02, 0.03, 0.03, 0.02], 2)


def test_adjusted_mutual_info_score():
    # Compute the Adjusted Mutual Information and test against known values
    labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
    labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
    # Mutual information
    mi = mutual_info_score(labels_a, labels_b)
    assert_almost_equal(mi, 0.41022, 5)
    # with provided sparse contingency
    C = contingency_matrix(labels_a, labels_b, sparse=True)
    mi = mutual_info_score(labels_a, labels_b, contingency=C)
    assert_almost_equal(mi, 0.41022, 5)
    # with provided dense contingency
    C = contingency_matrix(labels_a, labels_b)
    mi = mutual_info_score(labels_a, labels_b, contingency=C)
    assert_almost_equal(mi, 0.41022, 5)
    # Expected mutual information
    n_samples = C.sum()
    emi = expected_mutual_information(C, n_samples)
    assert_almost_equal(emi, 0.15042, 5)
    # Adjusted mutual information
    ami = adjusted_mutual_info_score(labels_a, labels_b)
    assert_almost_equal(ami, 0.27821, 5)
    ami = adjusted_mutual_info_score([1, 1, 2, 2], [2, 2, 3, 3])
    assert ami == pytest.approx(1.0)
    # Test with a very large array
    a110 = np.array([list(labels_a) * 110]).flatten()
    b110 = np.array([list(labels_b) * 110]).flatten()
    ami = adjusted_mutual_info_score(a110, b110)
    assert_almost_equal(ami, 0.38, 2)


def test_expected_mutual_info_overflow():
    # Test for regression where contingency cell exceeds 2**16
    # leading to overflow in np.outer, resulting in EMI > 1
    assert expected_mutual_information(np.array([[70000]]), 70000) <= 1


def test_int_overflow_mutual_info_fowlkes_mallows_score():
    # Test overflow in mutual_info_classif and fowlkes_mallows_score
    x = np.array(
        [1] * (52632 + 2529)
        + [2] * (14660 + 793)
        + [3] * (3271 + 204)
        + [4] * (814 + 39)
        + [5] * (316 + 20)
    )
    y = np.array(
        [0] * 52632
        + [1] * 2529
        + [0] * 14660
        + [1] * 793
        + [0] * 3271
        + [1] * 204
        + [0] * 814
        + [1] * 39
        + [0] * 316
        + [1] * 20
    )

    assert_all_finite(mutual_info_score(x, y))
    assert_all_finite(fowlkes_mallows_score(x, y))


def test_entropy():
    ent = entropy([0, 0, 42.0])
    assert_almost_equal(ent, 0.6365141, 5)
    assert_almost_equal(entropy([]), 1)
    assert entropy([1, 1, 1, 1]) == 0


def test_contingency_matrix():
    labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
    labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
    C = contingency_matrix(labels_a, labels_b)
    C2 = np.histogram2d(labels_a, labels_b, bins=(np.arange(1, 5), np.arange(1, 5)))[0]
    assert_array_almost_equal(C, C2)
    C = contingency_matrix(labels_a, labels_b, eps=0.1)
    assert_array_almost_equal(C, C2 + 0.1)


def test_contingency_matrix_sparse():
    labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
    labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
    C = contingency_matrix(labels_a, labels_b)
    C_sparse = contingency_matrix(labels_a, labels_b, sparse=True).toarray()
    assert_array_almost_equal(C, C_sparse)
    with pytest.raises(ValueError, match="Cannot set 'eps' when sparse=True"):
        contingency_matrix(labels_a, labels_b, eps=1e-10, sparse=True)


def test_exactly_zero_info_score():
    # Check numerical stability when information is exactly zero
    for i in np.logspace(1, 4, 4).astype(int):
        labels_a, labels_b = (np.ones(i, dtype=int), np.arange(i, dtype=int))
        assert normalized_mutual_info_score(labels_a, labels_b) == pytest.approx(0.0)
        assert v_measure_score(labels_a, labels_b) == pytest.approx(0.0)
        assert adjusted_mutual_info_score(labels_a, labels_b) == pytest.approx(0.0)
        assert normalized_mutual_info_score(labels_a, labels_b) == pytest.approx(0.0)
        for method in ["min", "geometric", "arithmetic", "max"]:
            assert adjusted_mutual_info_score(
                labels_a, labels_b, average_method=method
            ) == pytest.approx(0.0)
            assert normalized_mutual_info_score(
                labels_a, labels_b, average_method=method
            ) == pytest.approx(0.0)


def test_v_measure_and_mutual_information(seed=36):
    # Check relation between v_measure, entropy and mutual information
    for i in np.logspace(1, 4, 4).astype(int):
        random_state = np.random.RandomState(seed)
        labels_a, labels_b = (
            random_state.randint(0, 10, i),
            random_state.randint(0, 10, i),
        )
        assert_almost_equal(
            v_measure_score(labels_a, labels_b),
            2.0
            * mutual_info_score(labels_a, labels_b)
            / (entropy(labels_a) + entropy(labels_b)),
            0,
        )
        avg = "arithmetic"
        assert_almost_equal(
            v_measure_score(labels_a, labels_b),
            normalized_mutual_info_score(labels_a, labels_b, average_method=avg),
        )


def test_fowlkes_mallows_score():
    # General case
    score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1], [0, 0, 1, 1, 2, 2])
    assert_almost_equal(score, 4.0 / np.sqrt(12.0 * 6.0))

    # Perfect match but where the label names changed
    perfect_score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1], [1, 1, 1, 0, 0, 0])
    assert_almost_equal(perfect_score, 1.0)

    # Worst case
    worst_score = fowlkes_mallows_score([0, 0, 0, 0, 0, 0], [0, 1, 2, 3, 4, 5])
    assert_almost_equal(worst_score, 0.0)


def test_fowlkes_mallows_score_properties():
    # handcrafted example
    labels_a = np.array([0, 0, 0, 1, 1, 2])
    labels_b = np.array([1, 1, 2, 2, 0, 0])
    expected = 1.0 / np.sqrt((1.0 + 3.0) * (1.0 + 2.0))
    # FMI = TP / sqrt((TP + FP) * (TP + FN))

    score_original = fowlkes_mallows_score(labels_a, labels_b)
    assert_almost_equal(score_original, expected)

    # symmetric property
    score_symmetric = fowlkes_mallows_score(labels_b, labels_a)
    assert_almost_equal(score_symmetric, expected)

    # permutation property
    score_permuted = fowlkes_mallows_score((labels_a + 1) % 3, labels_b)
    assert_almost_equal(score_permuted, expected)

    # symmetric and permutation(both together)
    score_both = fowlkes_mallows_score(labels_b, (labels_a + 2) % 3)
    assert_almost_equal(score_both, expected)


@pytest.mark.parametrize(
    "labels_true, labels_pred",
    [
        (["a"] * 6, [1, 1, 0, 0, 1, 1]),
        ([1] * 6, [1, 1, 0, 0, 1, 1]),
        ([1, 1, 0, 0, 1, 1], ["a"] * 6),
        ([1, 1, 0, 0, 1, 1], [1] * 6),
        (["a"] * 6, ["a"] * 6),
    ],
)
def test_mutual_info_score_positive_constant_label(labels_true, labels_pred):
    # Check that MI = 0 when one or both labelling are constant
    # non-regression test for #16355
    assert mutual_info_score(labels_true, labels_pred) == 0


def test_check_clustering_error():
    # Test warning message for continuous values
    rng = np.random.RandomState(42)
    noise = rng.rand(500)
    wavelength = np.linspace(0.01, 1, 500) * 1e-6
    msg = (
        "Clustering metrics expects discrete values but received "
        "continuous values for label, and continuous values for "
        "target"
    )

    with pytest.warns(UserWarning, match=msg):
        check_clusterings(wavelength, noise)


def test_pair_confusion_matrix_fully_dispersed():
    # edge case: every element is its own cluster
    N = 100
    clustering1 = list(range(N))
    clustering2 = clustering1
    expected = np.array([[N * (N - 1), 0], [0, 0]])
    assert_array_equal(pair_confusion_matrix(clustering1, clustering2), expected)


def test_pair_confusion_matrix_single_cluster():
    # edge case: only one cluster
    N = 100
    clustering1 = np.zeros((N,))
    clustering2 = clustering1
    expected = np.array([[0, 0], [0, N * (N - 1)]])
    assert_array_equal(pair_confusion_matrix(clustering1, clustering2), expected)


def test_pair_confusion_matrix():
    # regular case: different non-trivial clusterings
    n = 10
    N = n**2
    clustering1 = np.hstack([[i + 1] * n for i in range(n)])
    clustering2 = np.hstack([[i + 1] * (n + 1) for i in range(n)])[:N]
    # basic quadratic implementation
    expected = np.zeros(shape=(2, 2), dtype=np.int64)
    for i in range(len(clustering1)):
        for j in range(len(clustering2)):
            if i != j:
                same_cluster_1 = int(clustering1[i] == clustering1[j])
                same_cluster_2 = int(clustering2[i] == clustering2[j])
                expected[same_cluster_1, same_cluster_2] += 1
    assert_array_equal(pair_confusion_matrix(clustering1, clustering2), expected)


@pytest.mark.parametrize(
    "clustering1, clustering2",
    [(list(range(100)), list(range(100))), (np.zeros((100,)), np.zeros((100,)))],
)
def test_rand_score_edge_cases(clustering1, clustering2):
    # edge case 1: every element is its own cluster
    # edge case 2: only one cluster
    assert_allclose(rand_score(clustering1, clustering2), 1.0)


def test_rand_score():
    # regular case: different non-trivial clusterings
    clustering1 = [0, 0, 0, 1, 1, 1]
    clustering2 = [0, 1, 0, 1, 2, 2]
    # pair confusion matrix
    D11 = 2 * 2  # ordered pairs (1, 3), (5, 6)
    D10 = 2 * 4  # ordered pairs (1, 2), (2, 3), (4, 5), (4, 6)
    D01 = 2 * 1  # ordered pair (2, 4)
    D00 = 5 * 6 - D11 - D01 - D10  # the remaining pairs
    # rand score
    expected_numerator = D00 + D11
    expected_denominator = D00 + D01 + D10 + D11
    expected = expected_numerator / expected_denominator
    assert_allclose(rand_score(clustering1, clustering2), expected)


def test_adjusted_rand_score_overflow():
    """Check that large amount of data will not lead to overflow in
    `adjusted_rand_score`.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20305
    """
    rng = np.random.RandomState(0)
    y_true = rng.randint(0, 2, 100_000, dtype=np.int8)
    y_pred = rng.randint(0, 2, 100_000, dtype=np.int8)
    with warnings.catch_warnings():
        warnings.simplefilter("error", RuntimeWarning)
        adjusted_rand_score(y_true, y_pred)


@pytest.mark.parametrize("average_method", ["min", "arithmetic", "geometric", "max"])
def test_normalized_mutual_info_score_bounded(average_method):
    """Check that nmi returns a score between 0 (included) and 1 (excluded
    for non-perfect match)

    Non-regression test for issue #13836
    """
    labels1 = [0] * 469
    labels2 = [1] + labels1[1:]
    labels3 = [0, 1] + labels1[2:]

    # labels1 is constant. The mutual info between labels1 and any other labelling is 0.
    nmi = normalized_mutual_info_score(labels1, labels2, average_method=average_method)
    assert nmi == 0

    # non constant, non perfect matching labels
    nmi = normalized_mutual_info_score(labels2, labels3, average_method=average_method)
    assert 0 <= nmi < 1
first commit 2023-06-02 12:51:02 +02:00			`import warnings`

			`import numpy as np`
			`import pytest`

			`from sklearn.metrics.cluster import adjusted_mutual_info_score`
			`from sklearn.metrics.cluster import adjusted_rand_score`
			`from sklearn.metrics.cluster import rand_score`
			`from sklearn.metrics.cluster import completeness_score`
			`from sklearn.metrics.cluster import contingency_matrix`
			`from sklearn.metrics.cluster import pair_confusion_matrix`
			`from sklearn.metrics.cluster import entropy`
			`from sklearn.metrics.cluster import expected_mutual_information`
			`from sklearn.metrics.cluster import fowlkes_mallows_score`
			`from sklearn.metrics.cluster import homogeneity_completeness_v_measure`
			`from sklearn.metrics.cluster import homogeneity_score`
			`from sklearn.metrics.cluster import mutual_info_score`
			`from sklearn.metrics.cluster import normalized_mutual_info_score`
			`from sklearn.metrics.cluster import v_measure_score`
			`from sklearn.metrics.cluster._supervised import _generalized_average`
			`from sklearn.metrics.cluster._supervised import check_clusterings`

			`from sklearn.utils import assert_all_finite`
			`from sklearn.utils._testing import assert_almost_equal`
			`from numpy.testing import assert_array_equal, assert_array_almost_equal, assert_allclose`


			`score_funcs = [`
			`adjusted_rand_score,`
			`rand_score,`
			`homogeneity_score,`
			`completeness_score,`
			`v_measure_score,`
			`adjusted_mutual_info_score,`
			`normalized_mutual_info_score,`
			`]`


			`def test_error_messages_on_wrong_input():`
			`for score_func in score_funcs:`
			`expected = (`
			`r"Found input variables with inconsistent numbers " r"of samples: \[2, 3\]"`
			`)`
			`with pytest.raises(ValueError, match=expected):`
			`score_func([0, 1], [1, 1, 1])`

			`expected = r"labels_true must be 1D: shape is \(2"`
			`with pytest.raises(ValueError, match=expected):`
			`score_func([[0, 1], [1, 0]], [1, 1, 1])`

			`expected = r"labels_pred must be 1D: shape is \(2"`
			`with pytest.raises(ValueError, match=expected):`
			`score_func([0, 1, 0], [[1, 1], [0, 0]])`


			`def test_generalized_average():`
			`a, b = 1, 2`
			`methods = ["min", "geometric", "arithmetic", "max"]`
			`means = [_generalized_average(a, b, method) for method in methods]`
			`assert means[0] <= means[1] <= means[2] <= means[3]`
			`c, d = 12, 12`
			`means = [_generalized_average(c, d, method) for method in methods]`
			`assert means[0] == means[1] == means[2] == means[3]`


			`def test_perfect_matches():`
			`for score_func in score_funcs:`
			`assert score_func([], []) == pytest.approx(1.0)`
			`assert score_func([0], [1]) == pytest.approx(1.0)`
			`assert score_func([0, 0, 0], [0, 0, 0]) == pytest.approx(1.0)`
			`assert score_func([0, 1, 0], [42, 7, 42]) == pytest.approx(1.0)`
			`assert score_func([0.0, 1.0, 0.0], [42.0, 7.0, 42.0]) == pytest.approx(1.0)`
			`assert score_func([0.0, 1.0, 2.0], [42.0, 7.0, 2.0]) == pytest.approx(1.0)`
			`assert score_func([0, 1, 2], [42, 7, 2]) == pytest.approx(1.0)`
			`score_funcs_with_changing_means = [`
			`normalized_mutual_info_score,`
			`adjusted_mutual_info_score,`
			`]`
			`means = {"min", "geometric", "arithmetic", "max"}`
			`for score_func in score_funcs_with_changing_means:`
			`for mean in means:`
			`assert score_func([], [], average_method=mean) == pytest.approx(1.0)`
			`assert score_func([0], [1], average_method=mean) == pytest.approx(1.0)`
			`assert score_func(`
			`[0, 0, 0], [0, 0, 0], average_method=mean`
			`) == pytest.approx(1.0)`
			`assert score_func(`
			`[0, 1, 0], [42, 7, 42], average_method=mean`
			`) == pytest.approx(1.0)`
			`assert score_func(`
			`[0.0, 1.0, 0.0], [42.0, 7.0, 42.0], average_method=mean`
			`) == pytest.approx(1.0)`
			`assert score_func(`
			`[0.0, 1.0, 2.0], [42.0, 7.0, 2.0], average_method=mean`
			`) == pytest.approx(1.0)`
			`assert score_func(`
			`[0, 1, 2], [42, 7, 2], average_method=mean`
			`) == pytest.approx(1.0)`


			`def test_homogeneous_but_not_complete_labeling():`
			`# homogeneous but not complete clustering`
			`h, c, v = homogeneity_completeness_v_measure([0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 2, 2])`
			`assert_almost_equal(h, 1.00, 2)`
			`assert_almost_equal(c, 0.69, 2)`
			`assert_almost_equal(v, 0.81, 2)`


			`def test_complete_but_not_homogeneous_labeling():`
			`# complete but not homogeneous clustering`
			`h, c, v = homogeneity_completeness_v_measure([0, 0, 1, 1, 2, 2], [0, 0, 1, 1, 1, 1])`
			`assert_almost_equal(h, 0.58, 2)`
			`assert_almost_equal(c, 1.00, 2)`
			`assert_almost_equal(v, 0.73, 2)`


			`def test_not_complete_and_not_homogeneous_labeling():`
			`# neither complete nor homogeneous but not so bad either`
			`h, c, v = homogeneity_completeness_v_measure([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])`
			`assert_almost_equal(h, 0.67, 2)`
			`assert_almost_equal(c, 0.42, 2)`
			`assert_almost_equal(v, 0.52, 2)`


			`def test_beta_parameter():`
			`# test for when beta passed to`
			`# homogeneity_completeness_v_measure`
			`# and v_measure_score`
			`beta_test = 0.2`
			`h_test = 0.67`
			`c_test = 0.42`
			`v_test = (1 + beta_test) * h_test * c_test / (beta_test * h_test + c_test)`

			`h, c, v = homogeneity_completeness_v_measure(`
			`[0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2], beta=beta_test`
			`)`
			`assert_almost_equal(h, h_test, 2)`
			`assert_almost_equal(c, c_test, 2)`
			`assert_almost_equal(v, v_test, 2)`

			`v = v_measure_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2], beta=beta_test)`
			`assert_almost_equal(v, v_test, 2)`


			`def test_non_consecutive_labels():`
			`# regression tests for labels with gaps`
			`h, c, v = homogeneity_completeness_v_measure([0, 0, 0, 2, 2, 2], [0, 1, 0, 1, 2, 2])`
			`assert_almost_equal(h, 0.67, 2)`
			`assert_almost_equal(c, 0.42, 2)`
			`assert_almost_equal(v, 0.52, 2)`

			`h, c, v = homogeneity_completeness_v_measure([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])`
			`assert_almost_equal(h, 0.67, 2)`
			`assert_almost_equal(c, 0.42, 2)`
			`assert_almost_equal(v, 0.52, 2)`

			`ari_1 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])`
			`ari_2 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])`
			`assert_almost_equal(ari_1, 0.24, 2)`
			`assert_almost_equal(ari_2, 0.24, 2)`

			`ri_1 = rand_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])`
			`ri_2 = rand_score([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])`
			`assert_almost_equal(ri_1, 0.66, 2)`
			`assert_almost_equal(ri_2, 0.66, 2)`


			`def uniform_labelings_scores(score_func, n_samples, k_range, n_runs=10, seed=42):`
			`# Compute score for random uniform cluster labelings`
			`random_labels = np.random.RandomState(seed).randint`
			`scores = np.zeros((len(k_range), n_runs))`
			`for i, k in enumerate(k_range):`
			`for j in range(n_runs):`
			`labels_a = random_labels(low=0, high=k, size=n_samples)`
			`labels_b = random_labels(low=0, high=k, size=n_samples)`
			`scores[i, j] = score_func(labels_a, labels_b)`
			`return scores`


			`def test_adjustment_for_chance():`
			`# Check that adjusted scores are almost zero on random labels`
			`n_clusters_range = [2, 10, 50, 90]`
			`n_samples = 100`
			`n_runs = 10`

			`scores = uniform_labelings_scores(`
			`adjusted_rand_score, n_samples, n_clusters_range, n_runs`
			`)`

			`max_abs_scores = np.abs(scores).max(axis=1)`
			`assert_array_almost_equal(max_abs_scores, [0.02, 0.03, 0.03, 0.02], 2)`


			`def test_adjusted_mutual_info_score():`
			`# Compute the Adjusted Mutual Information and test against known values`
			`labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])`
			`labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])`
			`# Mutual information`
			`mi = mutual_info_score(labels_a, labels_b)`
			`assert_almost_equal(mi, 0.41022, 5)`
			`# with provided sparse contingency`
			`C = contingency_matrix(labels_a, labels_b, sparse=True)`
			`mi = mutual_info_score(labels_a, labels_b, contingency=C)`
			`assert_almost_equal(mi, 0.41022, 5)`
			`# with provided dense contingency`
			`C = contingency_matrix(labels_a, labels_b)`
			`mi = mutual_info_score(labels_a, labels_b, contingency=C)`
			`assert_almost_equal(mi, 0.41022, 5)`
			`# Expected mutual information`
			`n_samples = C.sum()`
			`emi = expected_mutual_information(C, n_samples)`
			`assert_almost_equal(emi, 0.15042, 5)`
			`# Adjusted mutual information`
			`ami = adjusted_mutual_info_score(labels_a, labels_b)`
			`assert_almost_equal(ami, 0.27821, 5)`
			`ami = adjusted_mutual_info_score([1, 1, 2, 2], [2, 2, 3, 3])`
			`assert ami == pytest.approx(1.0)`
			`# Test with a very large array`
			`a110 = np.array([list(labels_a) * 110]).flatten()`
			`b110 = np.array([list(labels_b) * 110]).flatten()`
			`ami = adjusted_mutual_info_score(a110, b110)`
			`assert_almost_equal(ami, 0.38, 2)`


			`def test_expected_mutual_info_overflow():`
			`# Test for regression where contingency cell exceeds 2**16`
			`# leading to overflow in np.outer, resulting in EMI > 1`
			`assert expected_mutual_information(np.array([[70000]]), 70000) <= 1`


			`def test_int_overflow_mutual_info_fowlkes_mallows_score():`
			`# Test overflow in mutual_info_classif and fowlkes_mallows_score`
			`x = np.array(`
			`[1] * (52632 + 2529)`
			`+ [2] * (14660 + 793)`
			`+ [3] * (3271 + 204)`
			`+ [4] * (814 + 39)`
			`+ [5] * (316 + 20)`
			`)`
			`y = np.array(`
			`[0] * 52632`
			`+ [1] * 2529`
			`+ [0] * 14660`
			`+ [1] * 793`
			`+ [0] * 3271`
			`+ [1] * 204`
			`+ [0] * 814`
			`+ [1] * 39`
			`+ [0] * 316`
			`+ [1] * 20`
			`)`

			`assert_all_finite(mutual_info_score(x, y))`
			`assert_all_finite(fowlkes_mallows_score(x, y))`


			`def test_entropy():`
			`ent = entropy([0, 0, 42.0])`
			`assert_almost_equal(ent, 0.6365141, 5)`
			`assert_almost_equal(entropy([]), 1)`
			`assert entropy([1, 1, 1, 1]) == 0`


			`def test_contingency_matrix():`
			`labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])`
			`labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])`
			`C = contingency_matrix(labels_a, labels_b)`
			`C2 = np.histogram2d(labels_a, labels_b, bins=(np.arange(1, 5), np.arange(1, 5)))[0]`
			`assert_array_almost_equal(C, C2)`
			`C = contingency_matrix(labels_a, labels_b, eps=0.1)`
			`assert_array_almost_equal(C, C2 + 0.1)`


			`def test_contingency_matrix_sparse():`
			`labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])`
			`labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])`
			`C = contingency_matrix(labels_a, labels_b)`
			`C_sparse = contingency_matrix(labels_a, labels_b, sparse=True).toarray()`
			`assert_array_almost_equal(C, C_sparse)`
			`with pytest.raises(ValueError, match="Cannot set 'eps' when sparse=True"):`
			`contingency_matrix(labels_a, labels_b, eps=1e-10, sparse=True)`


			`def test_exactly_zero_info_score():`
			`# Check numerical stability when information is exactly zero`
			`for i in np.logspace(1, 4, 4).astype(int):`
			`labels_a, labels_b = (np.ones(i, dtype=int), np.arange(i, dtype=int))`
			`assert normalized_mutual_info_score(labels_a, labels_b) == pytest.approx(0.0)`
			`assert v_measure_score(labels_a, labels_b) == pytest.approx(0.0)`
			`assert adjusted_mutual_info_score(labels_a, labels_b) == pytest.approx(0.0)`
			`assert normalized_mutual_info_score(labels_a, labels_b) == pytest.approx(0.0)`
			`for method in ["min", "geometric", "arithmetic", "max"]:`
			`assert adjusted_mutual_info_score(`
			`labels_a, labels_b, average_method=method`
			`) == pytest.approx(0.0)`
			`assert normalized_mutual_info_score(`
			`labels_a, labels_b, average_method=method`
			`) == pytest.approx(0.0)`


			`def test_v_measure_and_mutual_information(seed=36):`
			`# Check relation between v_measure, entropy and mutual information`
			`for i in np.logspace(1, 4, 4).astype(int):`
			`random_state = np.random.RandomState(seed)`
			`labels_a, labels_b = (`
			`random_state.randint(0, 10, i),`
			`random_state.randint(0, 10, i),`
			`)`
			`assert_almost_equal(`
			`v_measure_score(labels_a, labels_b),`
			`2.0`
			`* mutual_info_score(labels_a, labels_b)`
			`/ (entropy(labels_a) + entropy(labels_b)),`
			`0,`
			`)`
			`avg = "arithmetic"`
			`assert_almost_equal(`
			`v_measure_score(labels_a, labels_b),`
			`normalized_mutual_info_score(labels_a, labels_b, average_method=avg),`
			`)`


			`def test_fowlkes_mallows_score():`
			`# General case`
			`score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1], [0, 0, 1, 1, 2, 2])`
			`assert_almost_equal(score, 4.0 / np.sqrt(12.0 * 6.0))`

			`# Perfect match but where the label names changed`
			`perfect_score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1], [1, 1, 1, 0, 0, 0])`
			`assert_almost_equal(perfect_score, 1.0)`

			`# Worst case`
			`worst_score = fowlkes_mallows_score([0, 0, 0, 0, 0, 0], [0, 1, 2, 3, 4, 5])`
			`assert_almost_equal(worst_score, 0.0)`


			`def test_fowlkes_mallows_score_properties():`
			`# handcrafted example`
			`labels_a = np.array([0, 0, 0, 1, 1, 2])`
			`labels_b = np.array([1, 1, 2, 2, 0, 0])`
			`expected = 1.0 / np.sqrt((1.0 + 3.0) * (1.0 + 2.0))`
			`# FMI = TP / sqrt((TP + FP) * (TP + FN))`

			`score_original = fowlkes_mallows_score(labels_a, labels_b)`
			`assert_almost_equal(score_original, expected)`

			`# symmetric property`
			`score_symmetric = fowlkes_mallows_score(labels_b, labels_a)`
			`assert_almost_equal(score_symmetric, expected)`

			`# permutation property`
			`score_permuted = fowlkes_mallows_score((labels_a + 1) % 3, labels_b)`
			`assert_almost_equal(score_permuted, expected)`

			`# symmetric and permutation(both together)`
			`score_both = fowlkes_mallows_score(labels_b, (labels_a + 2) % 3)`
			`assert_almost_equal(score_both, expected)`


			`@pytest.mark.parametrize(`
			`"labels_true, labels_pred",`
			`[`
			`(["a"] * 6, [1, 1, 0, 0, 1, 1]),`
			`([1] * 6, [1, 1, 0, 0, 1, 1]),`
			`([1, 1, 0, 0, 1, 1], ["a"] * 6),`
			`([1, 1, 0, 0, 1, 1], [1] * 6),`
			`(["a"] * 6, ["a"] * 6),`
			`],`
			`)`
			`def test_mutual_info_score_positive_constant_label(labels_true, labels_pred):`
			`# Check that MI = 0 when one or both labelling are constant`
			`# non-regression test for #16355`
			`assert mutual_info_score(labels_true, labels_pred) == 0`


			`def test_check_clustering_error():`
			`# Test warning message for continuous values`
			`rng = np.random.RandomState(42)`
			`noise = rng.rand(500)`
			`wavelength = np.linspace(0.01, 1, 500) * 1e-6`
			`msg = (`
			`"Clustering metrics expects discrete values but received "`
			`"continuous values for label, and continuous values for "`
			`"target"`
			`)`

			`with pytest.warns(UserWarning, match=msg):`
			`check_clusterings(wavelength, noise)`


			`def test_pair_confusion_matrix_fully_dispersed():`
			`# edge case: every element is its own cluster`
			`N = 100`
			`clustering1 = list(range(N))`
			`clustering2 = clustering1`
			`expected = np.array([[N * (N - 1), 0], [0, 0]])`
			`assert_array_equal(pair_confusion_matrix(clustering1, clustering2), expected)`


			`def test_pair_confusion_matrix_single_cluster():`
			`# edge case: only one cluster`
			`N = 100`
			`clustering1 = np.zeros((N,))`
			`clustering2 = clustering1`
			`expected = np.array([[0, 0], [0, N * (N - 1)]])`
			`assert_array_equal(pair_confusion_matrix(clustering1, clustering2), expected)`


			`def test_pair_confusion_matrix():`
			`# regular case: different non-trivial clusterings`
			`n = 10`
			`N = n**2`
			`clustering1 = np.hstack([[i + 1] * n for i in range(n)])`
			`clustering2 = np.hstack([[i + 1] * (n + 1) for i in range(n)])[:N]`
			`# basic quadratic implementation`
			`expected = np.zeros(shape=(2, 2), dtype=np.int64)`
			`for i in range(len(clustering1)):`
			`for j in range(len(clustering2)):`
			`if i != j:`
			`same_cluster_1 = int(clustering1[i] == clustering1[j])`
			`same_cluster_2 = int(clustering2[i] == clustering2[j])`
			`expected[same_cluster_1, same_cluster_2] += 1`
			`assert_array_equal(pair_confusion_matrix(clustering1, clustering2), expected)`


			`@pytest.mark.parametrize(`
			`"clustering1, clustering2",`
			`[(list(range(100)), list(range(100))), (np.zeros((100,)), np.zeros((100,)))],`
			`)`
			`def test_rand_score_edge_cases(clustering1, clustering2):`
			`# edge case 1: every element is its own cluster`
			`# edge case 2: only one cluster`
			`assert_allclose(rand_score(clustering1, clustering2), 1.0)`


			`def test_rand_score():`
			`# regular case: different non-trivial clusterings`
			`clustering1 = [0, 0, 0, 1, 1, 1]`
			`clustering2 = [0, 1, 0, 1, 2, 2]`
			`# pair confusion matrix`
			`D11 = 2 * 2 # ordered pairs (1, 3), (5, 6)`
			`D10 = 2 * 4 # ordered pairs (1, 2), (2, 3), (4, 5), (4, 6)`
			`D01 = 2 * 1 # ordered pair (2, 4)`
			`D00 = 5 * 6 - D11 - D01 - D10 # the remaining pairs`
			`# rand score`
			`expected_numerator = D00 + D11`
			`expected_denominator = D00 + D01 + D10 + D11`
			`expected = expected_numerator / expected_denominator`
			`assert_allclose(rand_score(clustering1, clustering2), expected)`


			`def test_adjusted_rand_score_overflow():`
			`"""Check that large amount of data will not lead to overflow in`
			`adjusted_rand_score`.
			`Non-regression test for:`
			`https://github.com/scikit-learn/scikit-learn/issues/20305`
			`"""`
			`rng = np.random.RandomState(0)`
			`y_true = rng.randint(0, 2, 100_000, dtype=np.int8)`
			`y_pred = rng.randint(0, 2, 100_000, dtype=np.int8)`
			`with warnings.catch_warnings():`
			`warnings.simplefilter("error", RuntimeWarning)`
			`adjusted_rand_score(y_true, y_pred)`


			`@pytest.mark.parametrize("average_method", ["min", "arithmetic", "geometric", "max"])`
			`def test_normalized_mutual_info_score_bounded(average_method):`
			`"""Check that nmi returns a score between 0 (included) and 1 (excluded`
			`for non-perfect match)`

			`Non-regression test for issue #13836`
			`"""`
			`labels1 = [0] * 469`
			`labels2 = [1] + labels1[1:]`
			`labels3 = [0, 1] + labels1[2:]`

			`# labels1 is constant. The mutual info between labels1 and any other labelling is 0.`
			`nmi = normalized_mutual_info_score(labels1, labels2, average_method=average_method)`
			`assert nmi == 0`

			`# non constant, non perfect matching labels`
			`nmi = normalized_mutual_info_score(labels2, labels3, average_method=average_method)`
			`assert 0 <= nmi < 1`