Traktor/myenv/Lib/site-packages/sklearn/utils/tests/test_multiclass.py

from itertools import product

import numpy as np
import pytest
from scipy.sparse import issparse

from sklearn import config_context, datasets
from sklearn.model_selection import ShuffleSplit
from sklearn.svm import SVC
from sklearn.utils._array_api import yield_namespace_device_dtype_combinations
from sklearn.utils._testing import (
    _array_api_for_tests,
    _convert_container,
    assert_allclose,
    assert_array_almost_equal,
    assert_array_equal,
)
from sklearn.utils.estimator_checks import _NotAnArray
from sklearn.utils.fixes import (
    COO_CONTAINERS,
    CSC_CONTAINERS,
    CSR_CONTAINERS,
    DOK_CONTAINERS,
    LIL_CONTAINERS,
)
from sklearn.utils.metaestimators import _safe_split
from sklearn.utils.multiclass import (
    _ovr_decision_function,
    check_classification_targets,
    class_distribution,
    is_multilabel,
    type_of_target,
    unique_labels,
)

multilabel_explicit_zero = np.array([[0, 1], [1, 0]])
multilabel_explicit_zero[:, 0] = 0


def _generate_sparse(
    data,
    sparse_containers=tuple(
        COO_CONTAINERS
        + CSC_CONTAINERS
        + CSR_CONTAINERS
        + DOK_CONTAINERS
        + LIL_CONTAINERS
    ),
    dtypes=(bool, int, np.int8, np.uint8, float, np.float32),
):
    return [
        sparse_container(data, dtype=dtype)
        for sparse_container in sparse_containers
        for dtype in dtypes
    ]


EXAMPLES = {
    "multilabel-indicator": [
        # valid when the data is formatted as sparse or dense, identified
        # by CSR format when the testing takes place
        *_generate_sparse(
            np.random.RandomState(42).randint(2, size=(10, 10)),
            sparse_containers=CSR_CONTAINERS,
            dtypes=(int,),
        ),
        [[0, 1], [1, 0]],
        [[0, 1]],
        *_generate_sparse(
            multilabel_explicit_zero, sparse_containers=CSC_CONTAINERS, dtypes=(int,)
        ),
        *_generate_sparse([[0, 1], [1, 0]]),
        *_generate_sparse([[0, 0], [0, 0]]),
        *_generate_sparse([[0, 1]]),
        # Only valid when data is dense
        [[-1, 1], [1, -1]],
        np.array([[-1, 1], [1, -1]]),
        np.array([[-3, 3], [3, -3]]),
        _NotAnArray(np.array([[-3, 3], [3, -3]])),
    ],
    "multiclass": [
        [1, 0, 2, 2, 1, 4, 2, 4, 4, 4],
        np.array([1, 0, 2]),
        np.array([1, 0, 2], dtype=np.int8),
        np.array([1, 0, 2], dtype=np.uint8),
        np.array([1, 0, 2], dtype=float),
        np.array([1, 0, 2], dtype=np.float32),
        np.array([[1], [0], [2]]),
        _NotAnArray(np.array([1, 0, 2])),
        [0, 1, 2],
        ["a", "b", "c"],
        np.array(["a", "b", "c"]),
        np.array(["a", "b", "c"], dtype=object),
        np.array(["a", "b", "c"], dtype=object),
    ],
    "multiclass-multioutput": [
        [[1, 0, 2, 2], [1, 4, 2, 4]],
        [["a", "b"], ["c", "d"]],
        np.array([[1, 0, 2, 2], [1, 4, 2, 4]]),
        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.int8),
        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.uint8),
        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=float),
        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.float32),
        *_generate_sparse(
            [[1, 0, 2, 2], [1, 4, 2, 4]],
            sparse_containers=CSC_CONTAINERS + CSR_CONTAINERS,
            dtypes=(int, np.int8, np.uint8, float, np.float32),
        ),
        np.array([["a", "b"], ["c", "d"]]),
        np.array([["a", "b"], ["c", "d"]]),
        np.array([["a", "b"], ["c", "d"]], dtype=object),
        np.array([[1, 0, 2]]),
        _NotAnArray(np.array([[1, 0, 2]])),
    ],
    "binary": [
        [0, 1],
        [1, 1],
        [],
        [0],
        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1]),
        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=bool),
        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.int8),
        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.uint8),
        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=float),
        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.float32),
        np.array([[0], [1]]),
        _NotAnArray(np.array([[0], [1]])),
        [1, -1],
        [3, 5],
        ["a"],
        ["a", "b"],
        ["abc", "def"],
        np.array(["abc", "def"]),
        ["a", "b"],
        np.array(["abc", "def"], dtype=object),
    ],
    "continuous": [
        [1e-5],
        [0, 0.5],
        np.array([[0], [0.5]]),
        np.array([[0], [0.5]], dtype=np.float32),
    ],
    "continuous-multioutput": [
        np.array([[0, 0.5], [0.5, 0]]),
        np.array([[0, 0.5], [0.5, 0]], dtype=np.float32),
        np.array([[0, 0.5]]),
        *_generate_sparse(
            [[0, 0.5], [0.5, 0]],
            sparse_containers=CSC_CONTAINERS + CSR_CONTAINERS,
            dtypes=(float, np.float32),
        ),
        *_generate_sparse(
            [[0, 0.5]],
            sparse_containers=CSC_CONTAINERS + CSR_CONTAINERS,
            dtypes=(float, np.float32),
        ),
    ],
    "unknown": [
        [[]],
        np.array([[]], dtype=object),
        [()],
        # sequence of sequences that weren't supported even before deprecation
        np.array([np.array([]), np.array([1, 2, 3])], dtype=object),
        [np.array([]), np.array([1, 2, 3])],
        [{1, 2, 3}, {1, 2}],
        [frozenset([1, 2, 3]), frozenset([1, 2])],
        # and also confusable as sequences of sequences
        [{0: "a", 1: "b"}, {0: "a"}],
        # ndim 0
        np.array(0),
        # empty second dimension
        np.array([[], []]),
        # 3d
        np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]]),
    ],
}

ARRAY_API_EXAMPLES = {
    "multilabel-indicator": [
        np.random.RandomState(42).randint(2, size=(10, 10)),
        [[0, 1], [1, 0]],
        [[0, 1]],
        multilabel_explicit_zero,
        [[0, 0], [0, 0]],
        [[-1, 1], [1, -1]],
        np.array([[-1, 1], [1, -1]]),
        np.array([[-3, 3], [3, -3]]),
        _NotAnArray(np.array([[-3, 3], [3, -3]])),
    ],
    "multiclass": [
        [1, 0, 2, 2, 1, 4, 2, 4, 4, 4],
        np.array([1, 0, 2]),
        np.array([1, 0, 2], dtype=np.int8),
        np.array([1, 0, 2], dtype=np.uint8),
        np.array([1, 0, 2], dtype=float),
        np.array([1, 0, 2], dtype=np.float32),
        np.array([[1], [0], [2]]),
        _NotAnArray(np.array([1, 0, 2])),
        [0, 1, 2],
    ],
    "multiclass-multioutput": [
        [[1, 0, 2, 2], [1, 4, 2, 4]],
        np.array([[1, 0, 2, 2], [1, 4, 2, 4]]),
        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.int8),
        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.uint8),
        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=float),
        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.float32),
        np.array([[1, 0, 2]]),
        _NotAnArray(np.array([[1, 0, 2]])),
    ],
    "binary": [
        [0, 1],
        [1, 1],
        [],
        [0],
        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1]),
        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=bool),
        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.int8),
        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.uint8),
        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=float),
        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.float32),
        np.array([[0], [1]]),
        _NotAnArray(np.array([[0], [1]])),
        [1, -1],
        [3, 5],
    ],
    "continuous": [
        [1e-5],
        [0, 0.5],
        np.array([[0], [0.5]]),
        np.array([[0], [0.5]], dtype=np.float32),
    ],
    "continuous-multioutput": [
        np.array([[0, 0.5], [0.5, 0]]),
        np.array([[0, 0.5], [0.5, 0]], dtype=np.float32),
        np.array([[0, 0.5]]),
    ],
    "unknown": [
        [[]],
        [()],
        np.array(0),
        np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]]),
    ],
}


NON_ARRAY_LIKE_EXAMPLES = [
    {1, 2, 3},
    {0: "a", 1: "b"},
    {0: [5], 1: [5]},
    "abc",
    frozenset([1, 2, 3]),
    None,
]

MULTILABEL_SEQUENCES = [
    [[1], [2], [0, 1]],
    [(), (2), (0, 1)],
    np.array([[], [1, 2]], dtype="object"),
    _NotAnArray(np.array([[], [1, 2]], dtype="object")),
]


def test_unique_labels():
    # Empty iterable
    with pytest.raises(ValueError):
        unique_labels()

    # Multiclass problem
    assert_array_equal(unique_labels(range(10)), np.arange(10))
    assert_array_equal(unique_labels(np.arange(10)), np.arange(10))
    assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4]))

    # Multilabel indicator
    assert_array_equal(
        unique_labels(np.array([[0, 0, 1], [1, 0, 1], [0, 0, 0]])), np.arange(3)
    )

    assert_array_equal(unique_labels(np.array([[0, 0, 1], [0, 0, 0]])), np.arange(3))

    # Several arrays passed
    assert_array_equal(unique_labels([4, 0, 2], range(5)), np.arange(5))
    assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)), np.arange(3))

    # Border line case with binary indicator matrix
    with pytest.raises(ValueError):
        unique_labels([4, 0, 2], np.ones((5, 5)))
    with pytest.raises(ValueError):
        unique_labels(np.ones((5, 4)), np.ones((5, 5)))

    assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))), np.arange(5))


def test_unique_labels_non_specific():
    # Test unique_labels with a variety of collected examples

    # Smoke test for all supported format
    for format in ["binary", "multiclass", "multilabel-indicator"]:
        for y in EXAMPLES[format]:
            unique_labels(y)

    # We don't support those format at the moment
    for example in NON_ARRAY_LIKE_EXAMPLES:
        with pytest.raises(ValueError):
            unique_labels(example)

    for y_type in [
        "unknown",
        "continuous",
        "continuous-multioutput",
        "multiclass-multioutput",
    ]:
        for example in EXAMPLES[y_type]:
            with pytest.raises(ValueError):
                unique_labels(example)


def test_unique_labels_mixed_types():
    # Mix with binary or multiclass and multilabel
    mix_clf_format = product(
        EXAMPLES["multilabel-indicator"], EXAMPLES["multiclass"] + EXAMPLES["binary"]
    )

    for y_multilabel, y_multiclass in mix_clf_format:
        with pytest.raises(ValueError):
            unique_labels(y_multiclass, y_multilabel)
        with pytest.raises(ValueError):
            unique_labels(y_multilabel, y_multiclass)

    with pytest.raises(ValueError):
        unique_labels([[1, 2]], [["a", "d"]])

    with pytest.raises(ValueError):
        unique_labels(["1", 2])

    with pytest.raises(ValueError):
        unique_labels([["1", 2], [1, 3]])

    with pytest.raises(ValueError):
        unique_labels([["1", "2"], [2, 3]])


def test_is_multilabel():
    for group, group_examples in EXAMPLES.items():
        dense_exp = group == "multilabel-indicator"

        for example in group_examples:
            # Only mark explicitly defined sparse examples as valid sparse
            # multilabel-indicators
            sparse_exp = dense_exp and issparse(example)

            if issparse(example) or (
                hasattr(example, "__array__")
                and np.asarray(example).ndim == 2
                and np.asarray(example).dtype.kind in "biuf"
                and np.asarray(example).shape[1] > 0
            ):
                examples_sparse = [
                    sparse_container(example)
                    for sparse_container in (
                        COO_CONTAINERS
                        + CSC_CONTAINERS
                        + CSR_CONTAINERS
                        + DOK_CONTAINERS
                        + LIL_CONTAINERS
                    )
                ]
                for exmpl_sparse in examples_sparse:
                    assert sparse_exp == is_multilabel(
                        exmpl_sparse
                    ), f"is_multilabel({exmpl_sparse!r}) should be {sparse_exp}"

            # Densify sparse examples before testing
            if issparse(example):
                example = example.toarray()

            assert dense_exp == is_multilabel(
                example
            ), f"is_multilabel({example!r}) should be {dense_exp}"


@pytest.mark.parametrize(
    "array_namespace, device, dtype_name",
    yield_namespace_device_dtype_combinations(),
)
def test_is_multilabel_array_api_compliance(array_namespace, device, dtype_name):
    xp = _array_api_for_tests(array_namespace, device)

    for group, group_examples in ARRAY_API_EXAMPLES.items():
        dense_exp = group == "multilabel-indicator"
        for example in group_examples:
            if np.asarray(example).dtype.kind == "f":
                example = np.asarray(example, dtype=dtype_name)
            else:
                example = np.asarray(example)
            example = xp.asarray(example, device=device)

            with config_context(array_api_dispatch=True):
                assert dense_exp == is_multilabel(
                    example
                ), f"is_multilabel({example!r}) should be {dense_exp}"


def test_check_classification_targets():
    for y_type in EXAMPLES.keys():
        if y_type in ["unknown", "continuous", "continuous-multioutput"]:
            for example in EXAMPLES[y_type]:
                msg = "Unknown label type: "
                with pytest.raises(ValueError, match=msg):
                    check_classification_targets(example)
        else:
            for example in EXAMPLES[y_type]:
                check_classification_targets(example)


# @ignore_warnings
def test_type_of_target():
    for group, group_examples in EXAMPLES.items():
        for example in group_examples:
            assert (
                type_of_target(example) == group
            ), "type_of_target(%r) should be %r, got %r" % (
                example,
                group,
                type_of_target(example),
            )

    for example in NON_ARRAY_LIKE_EXAMPLES:
        msg_regex = r"Expected array-like \(array or non-string sequence\).*"
        with pytest.raises(ValueError, match=msg_regex):
            type_of_target(example)

    for example in MULTILABEL_SEQUENCES:
        msg = (
            "You appear to be using a legacy multi-label data "
            "representation. Sequence of sequences are no longer supported;"
            " use a binary array or sparse matrix instead."
        )
        with pytest.raises(ValueError, match=msg):
            type_of_target(example)


def test_type_of_target_pandas_sparse():
    pd = pytest.importorskip("pandas")

    y = pd.arrays.SparseArray([1, np.nan, np.nan, 1, np.nan])
    msg = "y cannot be class 'SparseSeries' or 'SparseArray'"
    with pytest.raises(ValueError, match=msg):
        type_of_target(y)


def test_type_of_target_pandas_nullable():
    """Check that type_of_target works with pandas nullable dtypes."""
    pd = pytest.importorskip("pandas")

    for dtype in ["Int32", "Float32"]:
        y_true = pd.Series([1, 0, 2, 3, 4], dtype=dtype)
        assert type_of_target(y_true) == "multiclass"

        y_true = pd.Series([1, 0, 1, 0], dtype=dtype)
        assert type_of_target(y_true) == "binary"

    y_true = pd.DataFrame([[1.4, 3.1], [3.1, 1.4]], dtype="Float32")
    assert type_of_target(y_true) == "continuous-multioutput"

    y_true = pd.DataFrame([[0, 1], [1, 1]], dtype="Int32")
    assert type_of_target(y_true) == "multilabel-indicator"

    y_true = pd.DataFrame([[1, 2], [3, 1]], dtype="Int32")
    assert type_of_target(y_true) == "multiclass-multioutput"


@pytest.mark.parametrize("dtype", ["Int64", "Float64", "boolean"])
def test_unique_labels_pandas_nullable(dtype):
    """Checks that unique_labels work with pandas nullable dtypes.

    Non-regression test for gh-25634.
    """
    pd = pytest.importorskip("pandas")

    y_true = pd.Series([1, 0, 0, 1, 0, 1, 1, 0, 1], dtype=dtype)
    y_predicted = pd.Series([0, 0, 1, 1, 0, 1, 1, 1, 1], dtype="int64")

    labels = unique_labels(y_true, y_predicted)
    assert_array_equal(labels, [0, 1])


@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
def test_class_distribution(csc_container):
    y = np.array(
        [
            [1, 0, 0, 1],
            [2, 2, 0, 1],
            [1, 3, 0, 1],
            [4, 2, 0, 1],
            [2, 0, 0, 1],
            [1, 3, 0, 1],
        ]
    )
    # Define the sparse matrix with a mix of implicit and explicit zeros
    data = np.array([1, 2, 1, 4, 2, 1, 0, 2, 3, 2, 3, 1, 1, 1, 1, 1, 1])
    indices = np.array([0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 5, 0, 1, 2, 3, 4, 5])
    indptr = np.array([0, 6, 11, 11, 17])
    y_sp = csc_container((data, indices, indptr), shape=(6, 4))

    classes, n_classes, class_prior = class_distribution(y)
    classes_sp, n_classes_sp, class_prior_sp = class_distribution(y_sp)
    classes_expected = [[1, 2, 4], [0, 2, 3], [0], [1]]
    n_classes_expected = [3, 3, 1, 1]
    class_prior_expected = [[3 / 6, 2 / 6, 1 / 6], [1 / 3, 1 / 3, 1 / 3], [1.0], [1.0]]

    for k in range(y.shape[1]):
        assert_array_almost_equal(classes[k], classes_expected[k])
        assert_array_almost_equal(n_classes[k], n_classes_expected[k])
        assert_array_almost_equal(class_prior[k], class_prior_expected[k])

        assert_array_almost_equal(classes_sp[k], classes_expected[k])
        assert_array_almost_equal(n_classes_sp[k], n_classes_expected[k])
        assert_array_almost_equal(class_prior_sp[k], class_prior_expected[k])

    # Test again with explicit sample weights
    (classes, n_classes, class_prior) = class_distribution(
        y, [1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
    )
    (classes_sp, n_classes_sp, class_prior_sp) = class_distribution(
        y, [1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
    )
    class_prior_expected = [[4 / 9, 3 / 9, 2 / 9], [2 / 9, 4 / 9, 3 / 9], [1.0], [1.0]]

    for k in range(y.shape[1]):
        assert_array_almost_equal(classes[k], classes_expected[k])
        assert_array_almost_equal(n_classes[k], n_classes_expected[k])
        assert_array_almost_equal(class_prior[k], class_prior_expected[k])

        assert_array_almost_equal(classes_sp[k], classes_expected[k])
        assert_array_almost_equal(n_classes_sp[k], n_classes_expected[k])
        assert_array_almost_equal(class_prior_sp[k], class_prior_expected[k])


def test_safe_split_with_precomputed_kernel():
    clf = SVC()
    clfp = SVC(kernel="precomputed")

    iris = datasets.load_iris()
    X, y = iris.data, iris.target
    K = np.dot(X, X.T)

    cv = ShuffleSplit(test_size=0.25, random_state=0)
    train, test = list(cv.split(X))[0]

    X_train, y_train = _safe_split(clf, X, y, train)
    K_train, y_train2 = _safe_split(clfp, K, y, train)
    assert_array_almost_equal(K_train, np.dot(X_train, X_train.T))
    assert_array_almost_equal(y_train, y_train2)

    X_test, y_test = _safe_split(clf, X, y, test, train)
    K_test, y_test2 = _safe_split(clfp, K, y, test, train)
    assert_array_almost_equal(K_test, np.dot(X_test, X_train.T))
    assert_array_almost_equal(y_test, y_test2)


def test_ovr_decision_function():
    # test properties for ovr decision function

    predictions = np.array([[0, 1, 1], [0, 1, 0], [0, 1, 1], [0, 1, 1]])

    confidences = np.array(
        [[-1e16, 0, -1e16], [1.0, 2.0, -3.0], [-5.0, 2.0, 5.0], [-0.5, 0.2, 0.5]]
    )

    n_classes = 3

    dec_values = _ovr_decision_function(predictions, confidences, n_classes)

    # check that the decision values are within 0.5 range of the votes
    votes = np.array([[1, 0, 2], [1, 1, 1], [1, 0, 2], [1, 0, 2]])

    assert_allclose(votes, dec_values, atol=0.5)

    # check that the prediction are what we expect
    # highest vote or highest confidence if there is a tie.
    # for the second sample we have a tie (should be won by 1)
    expected_prediction = np.array([2, 1, 2, 2])
    assert_array_equal(np.argmax(dec_values, axis=1), expected_prediction)

    # third and fourth sample have the same vote but third sample
    # has higher confidence, this should reflect on the decision values
    assert dec_values[2, 2] > dec_values[3, 2]

    # assert subset invariance.
    dec_values_one = [
        _ovr_decision_function(
            np.array([predictions[i]]), np.array([confidences[i]]), n_classes
        )[0]
        for i in range(4)
    ]

    assert_allclose(dec_values, dec_values_one, atol=1e-6)


# TODO(1.7): Change to ValueError when byte labels is deprecated.
@pytest.mark.parametrize("input_type", ["list", "array"])
def test_labels_in_bytes_format(input_type):
    # check that we raise an error with bytes encoded labels
    # non-regression test for:
    # https://github.com/scikit-learn/scikit-learn/issues/16980
    target = _convert_container([b"a", b"b"], input_type)
    err_msg = (
        "Support for labels represented as bytes is deprecated in v1.5 and will"
        " error in v1.7. Convert the labels to a string or integer format."
    )
    with pytest.warns(FutureWarning, match=err_msg):
        type_of_target(target)
losowanie zdjec 2024-05-26 05:12:46 +02:00			`from itertools import product`

			`import numpy as np`
			`import pytest`
			`from scipy.sparse import issparse`

			`from sklearn import config_context, datasets`
			`from sklearn.model_selection import ShuffleSplit`
			`from sklearn.svm import SVC`
			`from sklearn.utils._array_api import yield_namespace_device_dtype_combinations`
			`from sklearn.utils._testing import (`
			`_array_api_for_tests,`
			`_convert_container,`
			`assert_allclose,`
			`assert_array_almost_equal,`
			`assert_array_equal,`
			`)`
			`from sklearn.utils.estimator_checks import _NotAnArray`
			`from sklearn.utils.fixes import (`
			`COO_CONTAINERS,`
			`CSC_CONTAINERS,`
			`CSR_CONTAINERS,`
			`DOK_CONTAINERS,`
			`LIL_CONTAINERS,`
			`)`
			`from sklearn.utils.metaestimators import _safe_split`
			`from sklearn.utils.multiclass import (`
			`_ovr_decision_function,`
			`check_classification_targets,`
			`class_distribution,`
			`is_multilabel,`
			`type_of_target,`
			`unique_labels,`
			`)`

			`multilabel_explicit_zero = np.array([[0, 1], [1, 0]])`
			`multilabel_explicit_zero[:, 0] = 0`


			`def _generate_sparse(`
			`data,`
			`sparse_containers=tuple(`
			`COO_CONTAINERS`
			`+ CSC_CONTAINERS`
			`+ CSR_CONTAINERS`
			`+ DOK_CONTAINERS`
			`+ LIL_CONTAINERS`
			`),`
			`dtypes=(bool, int, np.int8, np.uint8, float, np.float32),`
			`):`
			`return [`
			`sparse_container(data, dtype=dtype)`
			`for sparse_container in sparse_containers`
			`for dtype in dtypes`
			`]`


			`EXAMPLES = {`
			`"multilabel-indicator": [`
			`# valid when the data is formatted as sparse or dense, identified`
			`# by CSR format when the testing takes place`
			`*_generate_sparse(`
			`np.random.RandomState(42).randint(2, size=(10, 10)),`
			`sparse_containers=CSR_CONTAINERS,`
			`dtypes=(int,),`
			`),`
			`[[0, 1], [1, 0]],`
			`[[0, 1]],`
			`*_generate_sparse(`
			`multilabel_explicit_zero, sparse_containers=CSC_CONTAINERS, dtypes=(int,)`
			`),`
			`*_generate_sparse([[0, 1], [1, 0]]),`
			`*_generate_sparse([[0, 0], [0, 0]]),`
			`*_generate_sparse([[0, 1]]),`
			`# Only valid when data is dense`
			`[[-1, 1], [1, -1]],`
			`np.array([[-1, 1], [1, -1]]),`
			`np.array([[-3, 3], [3, -3]]),`
			`_NotAnArray(np.array([[-3, 3], [3, -3]])),`
			`],`
			`"multiclass": [`
			`[1, 0, 2, 2, 1, 4, 2, 4, 4, 4],`
			`np.array([1, 0, 2]),`
			`np.array([1, 0, 2], dtype=np.int8),`
			`np.array([1, 0, 2], dtype=np.uint8),`
			`np.array([1, 0, 2], dtype=float),`
			`np.array([1, 0, 2], dtype=np.float32),`
			`np.array([[1], [0], [2]]),`
			`_NotAnArray(np.array([1, 0, 2])),`
			`[0, 1, 2],`
			`["a", "b", "c"],`
			`np.array(["a", "b", "c"]),`
			`np.array(["a", "b", "c"], dtype=object),`
			`np.array(["a", "b", "c"], dtype=object),`
			`],`
			`"multiclass-multioutput": [`
			`[[1, 0, 2, 2], [1, 4, 2, 4]],`
			`[["a", "b"], ["c", "d"]],`
			`np.array([[1, 0, 2, 2], [1, 4, 2, 4]]),`
			`np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.int8),`
			`np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.uint8),`
			`np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=float),`
			`np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.float32),`
			`*_generate_sparse(`
			`[[1, 0, 2, 2], [1, 4, 2, 4]],`
			`sparse_containers=CSC_CONTAINERS + CSR_CONTAINERS,`
			`dtypes=(int, np.int8, np.uint8, float, np.float32),`
			`),`
			`np.array([["a", "b"], ["c", "d"]]),`
			`np.array([["a", "b"], ["c", "d"]]),`
			`np.array([["a", "b"], ["c", "d"]], dtype=object),`
			`np.array([[1, 0, 2]]),`
			`_NotAnArray(np.array([[1, 0, 2]])),`
			`],`
			`"binary": [`
			`[0, 1],`
			`[1, 1],`
			`[],`
			`[0],`
			`np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1]),`
			`np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=bool),`
			`np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.int8),`
			`np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.uint8),`
			`np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=float),`
			`np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.float32),`
			`np.array([[0], [1]]),`
			`_NotAnArray(np.array([[0], [1]])),`
			`[1, -1],`
			`[3, 5],`
			`["a"],`
			`["a", "b"],`
			`["abc", "def"],`
			`np.array(["abc", "def"]),`
			`["a", "b"],`
			`np.array(["abc", "def"], dtype=object),`
			`],`
			`"continuous": [`
			`[1e-5],`
			`[0, 0.5],`
			`np.array([[0], [0.5]]),`
			`np.array([[0], [0.5]], dtype=np.float32),`
			`],`
			`"continuous-multioutput": [`
			`np.array([[0, 0.5], [0.5, 0]]),`
			`np.array([[0, 0.5], [0.5, 0]], dtype=np.float32),`
			`np.array([[0, 0.5]]),`
			`*_generate_sparse(`
			`[[0, 0.5], [0.5, 0]],`
			`sparse_containers=CSC_CONTAINERS + CSR_CONTAINERS,`
			`dtypes=(float, np.float32),`
			`),`
			`*_generate_sparse(`
			`[[0, 0.5]],`
			`sparse_containers=CSC_CONTAINERS + CSR_CONTAINERS,`
			`dtypes=(float, np.float32),`
			`),`
			`],`
			`"unknown": [`
			`[[]],`
			`np.array([[]], dtype=object),`
			`[()],`
			`# sequence of sequences that weren't supported even before deprecation`
			`np.array([np.array([]), np.array([1, 2, 3])], dtype=object),`
			`[np.array([]), np.array([1, 2, 3])],`
			`[{1, 2, 3}, {1, 2}],`
			`[frozenset([1, 2, 3]), frozenset([1, 2])],`
			`# and also confusable as sequences of sequences`
			`[{0: "a", 1: "b"}, {0: "a"}],`
			`# ndim 0`
			`np.array(0),`
			`# empty second dimension`
			`np.array([[], []]),`
			`# 3d`
			`np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]]),`
			`],`
			`}`

			`ARRAY_API_EXAMPLES = {`
			`"multilabel-indicator": [`
			`np.random.RandomState(42).randint(2, size=(10, 10)),`
			`[[0, 1], [1, 0]],`
			`[[0, 1]],`
			`multilabel_explicit_zero,`
			`[[0, 0], [0, 0]],`
			`[[-1, 1], [1, -1]],`
			`np.array([[-1, 1], [1, -1]]),`
			`np.array([[-3, 3], [3, -3]]),`
			`_NotAnArray(np.array([[-3, 3], [3, -3]])),`
			`],`
			`"multiclass": [`
			`[1, 0, 2, 2, 1, 4, 2, 4, 4, 4],`
			`np.array([1, 0, 2]),`
			`np.array([1, 0, 2], dtype=np.int8),`
			`np.array([1, 0, 2], dtype=np.uint8),`
			`np.array([1, 0, 2], dtype=float),`
			`np.array([1, 0, 2], dtype=np.float32),`
			`np.array([[1], [0], [2]]),`
			`_NotAnArray(np.array([1, 0, 2])),`
			`[0, 1, 2],`
			`],`
			`"multiclass-multioutput": [`
			`[[1, 0, 2, 2], [1, 4, 2, 4]],`
			`np.array([[1, 0, 2, 2], [1, 4, 2, 4]]),`
			`np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.int8),`
			`np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.uint8),`
			`np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=float),`
			`np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.float32),`
			`np.array([[1, 0, 2]]),`
			`_NotAnArray(np.array([[1, 0, 2]])),`
			`],`
			`"binary": [`
			`[0, 1],`
			`[1, 1],`
			`[],`
			`[0],`
			`np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1]),`
			`np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=bool),`
			`np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.int8),`
			`np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.uint8),`
			`np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=float),`
			`np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.float32),`
			`np.array([[0], [1]]),`
			`_NotAnArray(np.array([[0], [1]])),`
			`[1, -1],`
			`[3, 5],`
			`],`
			`"continuous": [`
			`[1e-5],`
			`[0, 0.5],`
			`np.array([[0], [0.5]]),`
			`np.array([[0], [0.5]], dtype=np.float32),`
			`],`
			`"continuous-multioutput": [`
			`np.array([[0, 0.5], [0.5, 0]]),`
			`np.array([[0, 0.5], [0.5, 0]], dtype=np.float32),`
			`np.array([[0, 0.5]]),`
			`],`
			`"unknown": [`
			`[[]],`
			`[()],`
			`np.array(0),`
			`np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]]),`
			`],`
			`}`


			`NON_ARRAY_LIKE_EXAMPLES = [`
			`{1, 2, 3},`
			`{0: "a", 1: "b"},`
			`{0: [5], 1: [5]},`
			`"abc",`
			`frozenset([1, 2, 3]),`
			`None,`
			`]`

			`MULTILABEL_SEQUENCES = [`
			`[[1], [2], [0, 1]],`
			`[(), (2), (0, 1)],`
			`np.array([[], [1, 2]], dtype="object"),`
			`_NotAnArray(np.array([[], [1, 2]], dtype="object")),`
			`]`


			`def test_unique_labels():`
			`# Empty iterable`
			`with pytest.raises(ValueError):`
			`unique_labels()`

			`# Multiclass problem`
			`assert_array_equal(unique_labels(range(10)), np.arange(10))`
			`assert_array_equal(unique_labels(np.arange(10)), np.arange(10))`
			`assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4]))`

			`# Multilabel indicator`
			`assert_array_equal(`
			`unique_labels(np.array([[0, 0, 1], [1, 0, 1], [0, 0, 0]])), np.arange(3)`
			`)`

			`assert_array_equal(unique_labels(np.array([[0, 0, 1], [0, 0, 0]])), np.arange(3))`

			`# Several arrays passed`
			`assert_array_equal(unique_labels([4, 0, 2], range(5)), np.arange(5))`
			`assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)), np.arange(3))`

			`# Border line case with binary indicator matrix`
			`with pytest.raises(ValueError):`
			`unique_labels([4, 0, 2], np.ones((5, 5)))`
			`with pytest.raises(ValueError):`
			`unique_labels(np.ones((5, 4)), np.ones((5, 5)))`

			`assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))), np.arange(5))`


			`def test_unique_labels_non_specific():`
			`# Test unique_labels with a variety of collected examples`

			`# Smoke test for all supported format`
			`for format in ["binary", "multiclass", "multilabel-indicator"]:`
			`for y in EXAMPLES[format]:`
			`unique_labels(y)`

			`# We don't support those format at the moment`
			`for example in NON_ARRAY_LIKE_EXAMPLES:`
			`with pytest.raises(ValueError):`
			`unique_labels(example)`

			`for y_type in [`
			`"unknown",`
			`"continuous",`
			`"continuous-multioutput",`
			`"multiclass-multioutput",`
			`]:`
			`for example in EXAMPLES[y_type]:`
			`with pytest.raises(ValueError):`
			`unique_labels(example)`


			`def test_unique_labels_mixed_types():`
			`# Mix with binary or multiclass and multilabel`
			`mix_clf_format = product(`
			`EXAMPLES["multilabel-indicator"], EXAMPLES["multiclass"] + EXAMPLES["binary"]`
			`)`

			`for y_multilabel, y_multiclass in mix_clf_format:`
			`with pytest.raises(ValueError):`
			`unique_labels(y_multiclass, y_multilabel)`
			`with pytest.raises(ValueError):`
			`unique_labels(y_multilabel, y_multiclass)`

			`with pytest.raises(ValueError):`
			`unique_labels([[1, 2]], [["a", "d"]])`

			`with pytest.raises(ValueError):`
			`unique_labels(["1", 2])`

			`with pytest.raises(ValueError):`
			`unique_labels([["1", 2], [1, 3]])`

			`with pytest.raises(ValueError):`
			`unique_labels([["1", "2"], [2, 3]])`


			`def test_is_multilabel():`
			`for group, group_examples in EXAMPLES.items():`
			`dense_exp = group == "multilabel-indicator"`

			`for example in group_examples:`
			`# Only mark explicitly defined sparse examples as valid sparse`
			`# multilabel-indicators`
			`sparse_exp = dense_exp and issparse(example)`

			`if issparse(example) or (`
			`hasattr(example, "__array__")`
			`and np.asarray(example).ndim == 2`
			`and np.asarray(example).dtype.kind in "biuf"`
			`and np.asarray(example).shape[1] > 0`
			`):`
			`examples_sparse = [`
			`sparse_container(example)`
			`for sparse_container in (`
			`COO_CONTAINERS`
			`+ CSC_CONTAINERS`
			`+ CSR_CONTAINERS`
			`+ DOK_CONTAINERS`
			`+ LIL_CONTAINERS`
			`)`
			`]`
			`for exmpl_sparse in examples_sparse:`
			`assert sparse_exp == is_multilabel(`
			`exmpl_sparse`
			`), f"is_multilabel({exmpl_sparse!r}) should be {sparse_exp}"`

			`# Densify sparse examples before testing`
			`if issparse(example):`
			`example = example.toarray()`

			`assert dense_exp == is_multilabel(`
			`example`
			`), f"is_multilabel({example!r}) should be {dense_exp}"`


			`@pytest.mark.parametrize(`
			`"array_namespace, device, dtype_name",`
			`yield_namespace_device_dtype_combinations(),`
			`)`
			`def test_is_multilabel_array_api_compliance(array_namespace, device, dtype_name):`
			`xp = _array_api_for_tests(array_namespace, device)`

			`for group, group_examples in ARRAY_API_EXAMPLES.items():`
			`dense_exp = group == "multilabel-indicator"`
			`for example in group_examples:`
			`if np.asarray(example).dtype.kind == "f":`
			`example = np.asarray(example, dtype=dtype_name)`
			`else:`
			`example = np.asarray(example)`
			`example = xp.asarray(example, device=device)`

			`with config_context(array_api_dispatch=True):`
			`assert dense_exp == is_multilabel(`
			`example`
			`), f"is_multilabel({example!r}) should be {dense_exp}"`


			`def test_check_classification_targets():`
			`for y_type in EXAMPLES.keys():`
			`if y_type in ["unknown", "continuous", "continuous-multioutput"]:`
			`for example in EXAMPLES[y_type]:`
			`msg = "Unknown label type: "`
			`with pytest.raises(ValueError, match=msg):`
			`check_classification_targets(example)`
			`else:`
			`for example in EXAMPLES[y_type]:`
			`check_classification_targets(example)`


			`# @ignore_warnings`
			`def test_type_of_target():`
			`for group, group_examples in EXAMPLES.items():`
			`for example in group_examples:`
			`assert (`
			`type_of_target(example) == group`
			`), "type_of_target(%r) should be %r, got %r" % (`
			`example,`
			`group,`
			`type_of_target(example),`
			`)`

			`for example in NON_ARRAY_LIKE_EXAMPLES:`
			`msg_regex = r"Expected array-like \(array or non-string sequence\).*"`
			`with pytest.raises(ValueError, match=msg_regex):`
			`type_of_target(example)`

			`for example in MULTILABEL_SEQUENCES:`
			`msg = (`
			`"You appear to be using a legacy multi-label data "`
			`"representation. Sequence of sequences are no longer supported;"`
			`" use a binary array or sparse matrix instead."`
			`)`
			`with pytest.raises(ValueError, match=msg):`
			`type_of_target(example)`


			`def test_type_of_target_pandas_sparse():`
			`pd = pytest.importorskip("pandas")`

			`y = pd.arrays.SparseArray([1, np.nan, np.nan, 1, np.nan])`
			`msg = "y cannot be class 'SparseSeries' or 'SparseArray'"`
			`with pytest.raises(ValueError, match=msg):`
			`type_of_target(y)`


			`def test_type_of_target_pandas_nullable():`
			`"""Check that type_of_target works with pandas nullable dtypes."""`
			`pd = pytest.importorskip("pandas")`

			`for dtype in ["Int32", "Float32"]:`
			`y_true = pd.Series([1, 0, 2, 3, 4], dtype=dtype)`
			`assert type_of_target(y_true) == "multiclass"`

			`y_true = pd.Series([1, 0, 1, 0], dtype=dtype)`
			`assert type_of_target(y_true) == "binary"`

			`y_true = pd.DataFrame([[1.4, 3.1], [3.1, 1.4]], dtype="Float32")`
			`assert type_of_target(y_true) == "continuous-multioutput"`

			`y_true = pd.DataFrame([[0, 1], [1, 1]], dtype="Int32")`
			`assert type_of_target(y_true) == "multilabel-indicator"`

			`y_true = pd.DataFrame([[1, 2], [3, 1]], dtype="Int32")`
			`assert type_of_target(y_true) == "multiclass-multioutput"`


			`@pytest.mark.parametrize("dtype", ["Int64", "Float64", "boolean"])`
			`def test_unique_labels_pandas_nullable(dtype):`
			`"""Checks that unique_labels work with pandas nullable dtypes.`

			`Non-regression test for gh-25634.`
			`"""`
			`pd = pytest.importorskip("pandas")`

			`y_true = pd.Series([1, 0, 0, 1, 0, 1, 1, 0, 1], dtype=dtype)`
			`y_predicted = pd.Series([0, 0, 1, 1, 0, 1, 1, 1, 1], dtype="int64")`

			`labels = unique_labels(y_true, y_predicted)`
			`assert_array_equal(labels, [0, 1])`


			`@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)`
			`def test_class_distribution(csc_container):`
			`y = np.array(`
			`[`
			`[1, 0, 0, 1],`
			`[2, 2, 0, 1],`
			`[1, 3, 0, 1],`
			`[4, 2, 0, 1],`
			`[2, 0, 0, 1],`
			`[1, 3, 0, 1],`
			`]`
			`)`
			`# Define the sparse matrix with a mix of implicit and explicit zeros`
			`data = np.array([1, 2, 1, 4, 2, 1, 0, 2, 3, 2, 3, 1, 1, 1, 1, 1, 1])`
			`indices = np.array([0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 5, 0, 1, 2, 3, 4, 5])`
			`indptr = np.array([0, 6, 11, 11, 17])`
			`y_sp = csc_container((data, indices, indptr), shape=(6, 4))`

			`classes, n_classes, class_prior = class_distribution(y)`
			`classes_sp, n_classes_sp, class_prior_sp = class_distribution(y_sp)`
			`classes_expected = [[1, 2, 4], [0, 2, 3], [0], [1]]`
			`n_classes_expected = [3, 3, 1, 1]`
			`class_prior_expected = [[3 / 6, 2 / 6, 1 / 6], [1 / 3, 1 / 3, 1 / 3], [1.0], [1.0]]`

			`for k in range(y.shape[1]):`
			`assert_array_almost_equal(classes[k], classes_expected[k])`
			`assert_array_almost_equal(n_classes[k], n_classes_expected[k])`
			`assert_array_almost_equal(class_prior[k], class_prior_expected[k])`

			`assert_array_almost_equal(classes_sp[k], classes_expected[k])`
			`assert_array_almost_equal(n_classes_sp[k], n_classes_expected[k])`
			`assert_array_almost_equal(class_prior_sp[k], class_prior_expected[k])`

			`# Test again with explicit sample weights`
			`(classes, n_classes, class_prior) = class_distribution(`
			`y, [1.0, 2.0, 1.0, 2.0, 1.0, 2.0]`
			`)`
			`(classes_sp, n_classes_sp, class_prior_sp) = class_distribution(`
			`y, [1.0, 2.0, 1.0, 2.0, 1.0, 2.0]`
			`)`
			`class_prior_expected = [[4 / 9, 3 / 9, 2 / 9], [2 / 9, 4 / 9, 3 / 9], [1.0], [1.0]]`

			`for k in range(y.shape[1]):`
			`assert_array_almost_equal(classes[k], classes_expected[k])`
			`assert_array_almost_equal(n_classes[k], n_classes_expected[k])`
			`assert_array_almost_equal(class_prior[k], class_prior_expected[k])`

			`assert_array_almost_equal(classes_sp[k], classes_expected[k])`
			`assert_array_almost_equal(n_classes_sp[k], n_classes_expected[k])`
			`assert_array_almost_equal(class_prior_sp[k], class_prior_expected[k])`


			`def test_safe_split_with_precomputed_kernel():`
			`clf = SVC()`
			`clfp = SVC(kernel="precomputed")`

			`iris = datasets.load_iris()`
			`X, y = iris.data, iris.target`
			`K = np.dot(X, X.T)`

			`cv = ShuffleSplit(test_size=0.25, random_state=0)`
			`train, test = list(cv.split(X))[0]`

			`X_train, y_train = _safe_split(clf, X, y, train)`
			`K_train, y_train2 = _safe_split(clfp, K, y, train)`
			`assert_array_almost_equal(K_train, np.dot(X_train, X_train.T))`
			`assert_array_almost_equal(y_train, y_train2)`

			`X_test, y_test = _safe_split(clf, X, y, test, train)`
			`K_test, y_test2 = _safe_split(clfp, K, y, test, train)`
			`assert_array_almost_equal(K_test, np.dot(X_test, X_train.T))`
			`assert_array_almost_equal(y_test, y_test2)`


			`def test_ovr_decision_function():`
			`# test properties for ovr decision function`

			`predictions = np.array([[0, 1, 1], [0, 1, 0], [0, 1, 1], [0, 1, 1]])`

			`confidences = np.array(`
			`[[-1e16, 0, -1e16], [1.0, 2.0, -3.0], [-5.0, 2.0, 5.0], [-0.5, 0.2, 0.5]]`
			`)`

			`n_classes = 3`

			`dec_values = _ovr_decision_function(predictions, confidences, n_classes)`

			`# check that the decision values are within 0.5 range of the votes`
			`votes = np.array([[1, 0, 2], [1, 1, 1], [1, 0, 2], [1, 0, 2]])`

			`assert_allclose(votes, dec_values, atol=0.5)`

			`# check that the prediction are what we expect`
			`# highest vote or highest confidence if there is a tie.`
			`# for the second sample we have a tie (should be won by 1)`
			`expected_prediction = np.array([2, 1, 2, 2])`
			`assert_array_equal(np.argmax(dec_values, axis=1), expected_prediction)`

			`# third and fourth sample have the same vote but third sample`
			`# has higher confidence, this should reflect on the decision values`
			`assert dec_values[2, 2] > dec_values[3, 2]`

			`# assert subset invariance.`
			`dec_values_one = [`
			`_ovr_decision_function(`
			`np.array([predictions[i]]), np.array([confidences[i]]), n_classes`
			`)[0]`
			`for i in range(4)`
			`]`

			`assert_allclose(dec_values, dec_values_one, atol=1e-6)`


			`# TODO(1.7): Change to ValueError when byte labels is deprecated.`
			`@pytest.mark.parametrize("input_type", ["list", "array"])`
			`def test_labels_in_bytes_format(input_type):`
			`# check that we raise an error with bytes encoded labels`
			`# non-regression test for:`
			`# https://github.com/scikit-learn/scikit-learn/issues/16980`
			`target = _convert_container([b"a", b"b"], input_type)`
			`err_msg = (`
			`"Support for labels represented as bytes is deprecated in v1.5 and will"`
			`" error in v1.7. Convert the labels to a string or integer format."`
			`)`
			`with pytest.warns(FutureWarning, match=err_msg):`
			`type_of_target(target)`