Inzynierka_Gwiazdy/machine_learning/Lib/site-packages/sklearn/utils/multiclass.py

# Author: Arnaud Joly, Joel Nothman, Hamzeh Alsalhi
#
# License: BSD 3 clause
"""
Multi-class / multi-label utility function
==========================================

"""
from collections.abc import Sequence
from itertools import chain
import warnings

from scipy.sparse import issparse
from scipy.sparse import dok_matrix
from scipy.sparse import lil_matrix

import numpy as np

from .validation import check_array, _assert_all_finite
from ..utils._array_api import get_namespace


def _unique_multiclass(y):
    xp, is_array_api = get_namespace(y)
    if hasattr(y, "__array__") or is_array_api:
        return xp.unique_values(xp.asarray(y))
    else:
        return set(y)


def _unique_indicator(y):
    return np.arange(
        check_array(y, input_name="y", accept_sparse=["csr", "csc", "coo"]).shape[1]
    )


_FN_UNIQUE_LABELS = {
    "binary": _unique_multiclass,
    "multiclass": _unique_multiclass,
    "multilabel-indicator": _unique_indicator,
}


def unique_labels(*ys):
    """Extract an ordered array of unique labels.

    We don't allow:
        - mix of multilabel and multiclass (single label) targets
        - mix of label indicator matrix and anything else,
          because there are no explicit labels)
        - mix of label indicator matrices of different sizes
        - mix of string and integer labels

    At the moment, we also don't allow "multiclass-multioutput" input type.

    Parameters
    ----------
    *ys : array-likes
        Label values.

    Returns
    -------
    out : ndarray of shape (n_unique_labels,)
        An ordered array of unique labels.

    Examples
    --------
    >>> from sklearn.utils.multiclass import unique_labels
    >>> unique_labels([3, 5, 5, 5, 7, 7])
    array([3, 5, 7])
    >>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4])
    array([1, 2, 3, 4])
    >>> unique_labels([1, 2, 10], [5, 11])
    array([ 1,  2,  5, 10, 11])
    """
    xp, is_array_api = get_namespace(*ys)
    if not ys:
        raise ValueError("No argument has been passed.")
    # Check that we don't mix label format

    ys_types = set(type_of_target(x) for x in ys)
    if ys_types == {"binary", "multiclass"}:
        ys_types = {"multiclass"}

    if len(ys_types) > 1:
        raise ValueError("Mix type of y not allowed, got types %s" % ys_types)

    label_type = ys_types.pop()

    # Check consistency for the indicator format
    if (
        label_type == "multilabel-indicator"
        and len(
            set(
                check_array(y, accept_sparse=["csr", "csc", "coo"]).shape[1] for y in ys
            )
        )
        > 1
    ):
        raise ValueError(
            "Multi-label binary indicator input with different numbers of labels"
        )

    # Get the unique set of labels
    _unique_labels = _FN_UNIQUE_LABELS.get(label_type, None)
    if not _unique_labels:
        raise ValueError("Unknown label type: %s" % repr(ys))

    if is_array_api:
        # array_api does not allow for mixed dtypes
        unique_ys = xp.concat([_unique_labels(y) for y in ys])
        return xp.unique_values(unique_ys)

    ys_labels = set(chain.from_iterable((i for i in _unique_labels(y)) for y in ys))
    # Check that we don't mix string type with number type
    if len(set(isinstance(label, str) for label in ys_labels)) > 1:
        raise ValueError("Mix of label input types (string and number)")

    return xp.asarray(sorted(ys_labels))


def _is_integral_float(y):
    return y.dtype.kind == "f" and np.all(y.astype(int) == y)


def is_multilabel(y):
    """Check if ``y`` is in a multilabel format.

    Parameters
    ----------
    y : ndarray of shape (n_samples,)
        Target values.

    Returns
    -------
    out : bool
        Return ``True``, if ``y`` is in a multilabel format, else ```False``.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.utils.multiclass import is_multilabel
    >>> is_multilabel([0, 1, 0, 1])
    False
    >>> is_multilabel([[1], [0, 2], []])
    False
    >>> is_multilabel(np.array([[1, 0], [0, 0]]))
    True
    >>> is_multilabel(np.array([[1], [0], [0]]))
    False
    >>> is_multilabel(np.array([[1, 0, 0]]))
    True
    """
    xp, is_array_api = get_namespace(y)
    if hasattr(y, "__array__") or isinstance(y, Sequence) or is_array_api:
        # DeprecationWarning will be replaced by ValueError, see NEP 34
        # https://numpy.org/neps/nep-0034-infer-dtype-is-object.html
        check_y_kwargs = dict(
            accept_sparse=True,
            allow_nd=True,
            force_all_finite=False,
            ensure_2d=False,
            ensure_min_samples=0,
            ensure_min_features=0,
        )
        with warnings.catch_warnings():
            warnings.simplefilter("error", np.VisibleDeprecationWarning)
            try:
                y = check_array(y, dtype=None, **check_y_kwargs)
            except (np.VisibleDeprecationWarning, ValueError) as e:
                if str(e).startswith("Complex data not supported"):
                    raise

                # dtype=object should be provided explicitly for ragged arrays,
                # see NEP 34
                y = check_array(y, dtype=object, **check_y_kwargs)

    if not (hasattr(y, "shape") and y.ndim == 2 and y.shape[1] > 1):
        return False

    if issparse(y):
        if isinstance(y, (dok_matrix, lil_matrix)):
            y = y.tocsr()
        labels = xp.unique_values(y.data)
        return (
            len(y.data) == 0
            or (labels.size == 1 or (labels.size == 2) and (0 in labels))
            and (y.dtype.kind in "biu" or _is_integral_float(labels))  # bool, int, uint
        )
    else:
        labels = xp.unique_values(y)

        return len(labels) < 3 and (
            y.dtype.kind in "biu" or _is_integral_float(labels)  # bool, int, uint
        )


def check_classification_targets(y):
    """Ensure that target y is of a non-regression type.

    Only the following target types (as defined in type_of_target) are allowed:
        'binary', 'multiclass', 'multiclass-multioutput',
        'multilabel-indicator', 'multilabel-sequences'

    Parameters
    ----------
    y : array-like
        Target values.
    """
    y_type = type_of_target(y, input_name="y")
    if y_type not in [
        "binary",
        "multiclass",
        "multiclass-multioutput",
        "multilabel-indicator",
        "multilabel-sequences",
    ]:
        raise ValueError("Unknown label type: %r" % y_type)


def type_of_target(y, input_name=""):
    """Determine the type of data indicated by the target.

    Note that this type is the most specific type that can be inferred.
    For example:

        * ``binary`` is more specific but compatible with ``multiclass``.
        * ``multiclass`` of integers is more specific but compatible with
          ``continuous``.
        * ``multilabel-indicator`` is more specific but compatible with
          ``multiclass-multioutput``.

    Parameters
    ----------
    y : {array-like, sparse matrix}
        Target values. If a sparse matrix, `y` is expected to be a
        CSR/CSC matrix.

    input_name : str, default=""
        The data name used to construct the error message.

        .. versionadded:: 1.1.0

    Returns
    -------
    target_type : str
        One of:

        * 'continuous': `y` is an array-like of floats that are not all
          integers, and is 1d or a column vector.
        * 'continuous-multioutput': `y` is a 2d array of floats that are
          not all integers, and both dimensions are of size > 1.
        * 'binary': `y` contains <= 2 discrete values and is 1d or a column
          vector.
        * 'multiclass': `y` contains more than two discrete values, is not a
          sequence of sequences, and is 1d or a column vector.
        * 'multiclass-multioutput': `y` is a 2d array that contains more
          than two discrete values, is not a sequence of sequences, and both
          dimensions are of size > 1.
        * 'multilabel-indicator': `y` is a label indicator matrix, an array
          of two dimensions with at least two columns, and at most 2 unique
          values.
        * 'unknown': `y` is array-like but none of the above, such as a 3d
          array, sequence of sequences, or an array of non-sequence objects.

    Examples
    --------
    >>> from sklearn.utils.multiclass import type_of_target
    >>> import numpy as np
    >>> type_of_target([0.1, 0.6])
    'continuous'
    >>> type_of_target([1, -1, -1, 1])
    'binary'
    >>> type_of_target(['a', 'b', 'a'])
    'binary'
    >>> type_of_target([1.0, 2.0])
    'binary'
    >>> type_of_target([1, 0, 2])
    'multiclass'
    >>> type_of_target([1.0, 0.0, 3.0])
    'multiclass'
    >>> type_of_target(['a', 'b', 'c'])
    'multiclass'
    >>> type_of_target(np.array([[1, 2], [3, 1]]))
    'multiclass-multioutput'
    >>> type_of_target([[1, 2]])
    'multilabel-indicator'
    >>> type_of_target(np.array([[1.5, 2.0], [3.0, 1.6]]))
    'continuous-multioutput'
    >>> type_of_target(np.array([[0, 1], [1, 1]]))
    'multilabel-indicator'
    """
    xp, is_array_api = get_namespace(y)
    valid = (
        (isinstance(y, Sequence) or issparse(y) or hasattr(y, "__array__"))
        and not isinstance(y, str)
        or is_array_api
    )

    if not valid:
        raise ValueError(
            "Expected array-like (array or non-string sequence), got %r" % y
        )

    sparse_pandas = y.__class__.__name__ in ["SparseSeries", "SparseArray"]
    if sparse_pandas:
        raise ValueError("y cannot be class 'SparseSeries' or 'SparseArray'")

    if is_multilabel(y):
        return "multilabel-indicator"

    # DeprecationWarning will be replaced by ValueError, see NEP 34
    # https://numpy.org/neps/nep-0034-infer-dtype-is-object.html
    # We therefore catch both deprecation (NumPy < 1.24) warning and
    # value error (NumPy >= 1.24).
    check_y_kwargs = dict(
        accept_sparse=True,
        allow_nd=True,
        force_all_finite=False,
        ensure_2d=False,
        ensure_min_samples=0,
        ensure_min_features=0,
    )

    with warnings.catch_warnings():
        warnings.simplefilter("error", np.VisibleDeprecationWarning)
        if not issparse(y):
            try:
                y = check_array(y, dtype=None, **check_y_kwargs)
            except (np.VisibleDeprecationWarning, ValueError) as e:
                if str(e).startswith("Complex data not supported"):
                    raise

                # dtype=object should be provided explicitly for ragged arrays,
                # see NEP 34
                y = check_array(y, dtype=object, **check_y_kwargs)

    # The old sequence of sequences format
    try:
        if (
            not hasattr(y[0], "__array__")
            and isinstance(y[0], Sequence)
            and not isinstance(y[0], str)
        ):
            raise ValueError(
                "You appear to be using a legacy multi-label data"
                " representation. Sequence of sequences are no"
                " longer supported; use a binary array or sparse"
                " matrix instead - the MultiLabelBinarizer"
                " transformer can convert to this format."
            )
    except IndexError:
        pass

    # Invalid inputs
    if y.ndim not in (1, 2):
        # Number of dimension greater than 2: [[[1, 2]]]
        return "unknown"
    if not min(y.shape):
        # Empty ndarray: []/[[]]
        if y.ndim == 1:
            # 1-D empty array: []
            return "binary"  # []
        # 2-D empty array: [[]]
        return "unknown"
    if not issparse(y) and y.dtype == object and not isinstance(y.flat[0], str):
        # [obj_1] and not ["label_1"]
        return "unknown"

    # Check if multioutput
    if y.ndim == 2 and y.shape[1] > 1:
        suffix = "-multioutput"  # [[1, 2], [1, 2]]
    else:
        suffix = ""  # [1, 2, 3] or [[1], [2], [3]]

    # Check float and contains non-integer float values
    if y.dtype.kind == "f":
        # [.1, .2, 3] or [[.1, .2, 3]] or [[1., .2]] and not [1., 2., 3.]
        data = y.data if issparse(y) else y
        if xp.any(data != data.astype(int)):
            _assert_all_finite(data, input_name=input_name)
            return "continuous" + suffix

    # Check multiclass
    first_row = y[0] if not issparse(y) else y.getrow(0).data
    if xp.unique_values(y).shape[0] > 2 or (y.ndim == 2 and len(first_row) > 1):
        # [1, 2, 3] or [[1., 2., 3]] or [[1, 2]]
        return "multiclass" + suffix
    else:
        return "binary"  # [1, 2] or [["a"], ["b"]]


def _check_partial_fit_first_call(clf, classes=None):
    """Private helper function for factorizing common classes param logic.

    Estimators that implement the ``partial_fit`` API need to be provided with
    the list of possible classes at the first call to partial_fit.

    Subsequent calls to partial_fit should check that ``classes`` is still
    consistent with a previous value of ``clf.classes_`` when provided.

    This function returns True if it detects that this was the first call to
    ``partial_fit`` on ``clf``. In that case the ``classes_`` attribute is also
    set on ``clf``.

    """
    if getattr(clf, "classes_", None) is None and classes is None:
        raise ValueError("classes must be passed on the first call to partial_fit.")

    elif classes is not None:
        if getattr(clf, "classes_", None) is not None:
            if not np.array_equal(clf.classes_, unique_labels(classes)):
                raise ValueError(
                    "`classes=%r` is not the same as on last call "
                    "to partial_fit, was: %r" % (classes, clf.classes_)
                )

        else:
            # This is the first call to partial_fit
            clf.classes_ = unique_labels(classes)
            return True

    # classes is None and clf.classes_ has already previously been set:
    # nothing to do
    return False


def class_distribution(y, sample_weight=None):
    """Compute class priors from multioutput-multiclass target data.

    Parameters
    ----------
    y : {array-like, sparse matrix} of size (n_samples, n_outputs)
        The labels for each example.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    Returns
    -------
    classes : list of size n_outputs of ndarray of size (n_classes,)
        List of classes for each column.

    n_classes : list of int of size n_outputs
        Number of classes in each column.

    class_prior : list of size n_outputs of ndarray of size (n_classes,)
        Class distribution of each column.
    """
    classes = []
    n_classes = []
    class_prior = []

    n_samples, n_outputs = y.shape
    if sample_weight is not None:
        sample_weight = np.asarray(sample_weight)

    if issparse(y):
        y = y.tocsc()
        y_nnz = np.diff(y.indptr)

        for k in range(n_outputs):
            col_nonzero = y.indices[y.indptr[k] : y.indptr[k + 1]]
            # separate sample weights for zero and non-zero elements
            if sample_weight is not None:
                nz_samp_weight = sample_weight[col_nonzero]
                zeros_samp_weight_sum = np.sum(sample_weight) - np.sum(nz_samp_weight)
            else:
                nz_samp_weight = None
                zeros_samp_weight_sum = y.shape[0] - y_nnz[k]

            classes_k, y_k = np.unique(
                y.data[y.indptr[k] : y.indptr[k + 1]], return_inverse=True
            )
            class_prior_k = np.bincount(y_k, weights=nz_samp_weight)

            # An explicit zero was found, combine its weight with the weight
            # of the implicit zeros
            if 0 in classes_k:
                class_prior_k[classes_k == 0] += zeros_samp_weight_sum

            # If an there is an implicit zero and it is not in classes and
            # class_prior, make an entry for it
            if 0 not in classes_k and y_nnz[k] < y.shape[0]:
                classes_k = np.insert(classes_k, 0, 0)
                class_prior_k = np.insert(class_prior_k, 0, zeros_samp_weight_sum)

            classes.append(classes_k)
            n_classes.append(classes_k.shape[0])
            class_prior.append(class_prior_k / class_prior_k.sum())
    else:
        for k in range(n_outputs):
            classes_k, y_k = np.unique(y[:, k], return_inverse=True)
            classes.append(classes_k)
            n_classes.append(classes_k.shape[0])
            class_prior_k = np.bincount(y_k, weights=sample_weight)
            class_prior.append(class_prior_k / class_prior_k.sum())

    return (classes, n_classes, class_prior)


def _ovr_decision_function(predictions, confidences, n_classes):
    """Compute a continuous, tie-breaking OvR decision function from OvO.

    It is important to include a continuous value, not only votes,
    to make computing AUC or calibration meaningful.

    Parameters
    ----------
    predictions : array-like of shape (n_samples, n_classifiers)
        Predicted classes for each binary classifier.

    confidences : array-like of shape (n_samples, n_classifiers)
        Decision functions or predicted probabilities for positive class
        for each binary classifier.

    n_classes : int
        Number of classes. n_classifiers must be
        ``n_classes * (n_classes - 1 ) / 2``.
    """
    n_samples = predictions.shape[0]
    votes = np.zeros((n_samples, n_classes))
    sum_of_confidences = np.zeros((n_samples, n_classes))

    k = 0
    for i in range(n_classes):
        for j in range(i + 1, n_classes):
            sum_of_confidences[:, i] -= confidences[:, k]
            sum_of_confidences[:, j] += confidences[:, k]
            votes[predictions[:, k] == 0, i] += 1
            votes[predictions[:, k] == 1, j] += 1
            k += 1

    # Monotonically transform the sum_of_confidences to (-1/3, 1/3)
    # and add it with votes. The monotonic transformation  is
    # f: x -> x / (3 * (|x| + 1)), it uses 1/3 instead of 1/2
    # to ensure that we won't reach the limits and change vote order.
    # The motivation is to use confidence levels as a way to break ties in
    # the votes without switching any decision made based on a difference
    # of 1 vote.
    transformed_confidences = sum_of_confidences / (
        3 * (np.abs(sum_of_confidences) + 1)
    )
    return votes + transformed_confidences
Machine learning 2023-09-20 19:46:58 +02:00			`# Author: Arnaud Joly, Joel Nothman, Hamzeh Alsalhi`
			`#`
			`# License: BSD 3 clause`
			`"""`
			`Multi-class / multi-label utility function`
			`==========================================`

			`"""`
			`from collections.abc import Sequence`
			`from itertools import chain`
			`import warnings`

			`from scipy.sparse import issparse`
			`from scipy.sparse import dok_matrix`
			`from scipy.sparse import lil_matrix`

			`import numpy as np`

			`from .validation import check_array, _assert_all_finite`
			`from ..utils._array_api import get_namespace`


			`def _unique_multiclass(y):`
			`xp, is_array_api = get_namespace(y)`
			`if hasattr(y, "__array__") or is_array_api:`
			`return xp.unique_values(xp.asarray(y))`
			`else:`
			`return set(y)`


			`def _unique_indicator(y):`
			`return np.arange(`
			`check_array(y, input_name="y", accept_sparse=["csr", "csc", "coo"]).shape[1]`
			`)`


			`_FN_UNIQUE_LABELS = {`
			`"binary": _unique_multiclass,`
			`"multiclass": _unique_multiclass,`
			`"multilabel-indicator": _unique_indicator,`
			`}`


			`def unique_labels(*ys):`
			`"""Extract an ordered array of unique labels.`

			`We don't allow:`
			`- mix of multilabel and multiclass (single label) targets`
			`- mix of label indicator matrix and anything else,`
			`because there are no explicit labels)`
			`- mix of label indicator matrices of different sizes`
			`- mix of string and integer labels`

			`At the moment, we also don't allow "multiclass-multioutput" input type.`

			`Parameters`
			`----------`
			`*ys : array-likes`
			`Label values.`

			`Returns`
			`-------`
			`out : ndarray of shape (n_unique_labels,)`
			`An ordered array of unique labels.`

			`Examples`
			`--------`
			`>>> from sklearn.utils.multiclass import unique_labels`
			`>>> unique_labels([3, 5, 5, 5, 7, 7])`
			`array([3, 5, 7])`
			`>>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4])`
			`array([1, 2, 3, 4])`
			`>>> unique_labels([1, 2, 10], [5, 11])`
			`array([ 1, 2, 5, 10, 11])`
			`"""`
			`xp, is_array_api = get_namespace(*ys)`
			`if not ys:`
			`raise ValueError("No argument has been passed.")`
			`# Check that we don't mix label format`

			`ys_types = set(type_of_target(x) for x in ys)`
			`if ys_types == {"binary", "multiclass"}:`
			`ys_types = {"multiclass"}`

			`if len(ys_types) > 1:`
			`raise ValueError("Mix type of y not allowed, got types %s" % ys_types)`

			`label_type = ys_types.pop()`

			`# Check consistency for the indicator format`
			`if (`
			`label_type == "multilabel-indicator"`
			`and len(`
			`set(`
			`check_array(y, accept_sparse=["csr", "csc", "coo"]).shape[1] for y in ys`
			`)`
			`)`
			`> 1`
			`):`
			`raise ValueError(`
			`"Multi-label binary indicator input with different numbers of labels"`
			`)`

			`# Get the unique set of labels`
			`_unique_labels = _FN_UNIQUE_LABELS.get(label_type, None)`
			`if not _unique_labels:`
			`raise ValueError("Unknown label type: %s" % repr(ys))`

			`if is_array_api:`
			`# array_api does not allow for mixed dtypes`
			`unique_ys = xp.concat([_unique_labels(y) for y in ys])`
			`return xp.unique_values(unique_ys)`

			`ys_labels = set(chain.from_iterable((i for i in _unique_labels(y)) for y in ys))`
			`# Check that we don't mix string type with number type`
			`if len(set(isinstance(label, str) for label in ys_labels)) > 1:`
			`raise ValueError("Mix of label input types (string and number)")`

			`return xp.asarray(sorted(ys_labels))`


			`def _is_integral_float(y):`
			`return y.dtype.kind == "f" and np.all(y.astype(int) == y)`


			`def is_multilabel(y):`
			"""Check if ``y`` is in a multilabel format.

			`Parameters`
			`----------`
			`y : ndarray of shape (n_samples,)`
			`Target values.`

			`Returns`
			`-------`
			`out : bool`
			Return ``True``, if ``y`` is in a multilabel format, else ```False``.

			`Examples`
			`--------`
			`>>> import numpy as np`
			`>>> from sklearn.utils.multiclass import is_multilabel`
			`>>> is_multilabel([0, 1, 0, 1])`
			`False`
			`>>> is_multilabel([[1], [0, 2], []])`
			`False`
			`>>> is_multilabel(np.array([[1, 0], [0, 0]]))`
			`True`
			`>>> is_multilabel(np.array([[1], [0], [0]]))`
			`False`
			`>>> is_multilabel(np.array([[1, 0, 0]]))`
			`True`
			`"""`
			`xp, is_array_api = get_namespace(y)`
			`if hasattr(y, "__array__") or isinstance(y, Sequence) or is_array_api:`
			`# DeprecationWarning will be replaced by ValueError, see NEP 34`
			`# https://numpy.org/neps/nep-0034-infer-dtype-is-object.html`
			`check_y_kwargs = dict(`
			`accept_sparse=True,`
			`allow_nd=True,`
			`force_all_finite=False,`
			`ensure_2d=False,`
			`ensure_min_samples=0,`
			`ensure_min_features=0,`
			`)`
			`with warnings.catch_warnings():`
			`warnings.simplefilter("error", np.VisibleDeprecationWarning)`
			`try:`
			`y = check_array(y, dtype=None, **check_y_kwargs)`
			`except (np.VisibleDeprecationWarning, ValueError) as e:`
			`if str(e).startswith("Complex data not supported"):`
			`raise`

			`# dtype=object should be provided explicitly for ragged arrays,`
			`# see NEP 34`
			`y = check_array(y, dtype=object, **check_y_kwargs)`

			`if not (hasattr(y, "shape") and y.ndim == 2 and y.shape[1] > 1):`
			`return False`

			`if issparse(y):`
			`if isinstance(y, (dok_matrix, lil_matrix)):`
			`y = y.tocsr()`
			`labels = xp.unique_values(y.data)`
			`return (`
			`len(y.data) == 0`
			`or (labels.size == 1 or (labels.size == 2) and (0 in labels))`
			`and (y.dtype.kind in "biu" or _is_integral_float(labels)) # bool, int, uint`
			`)`
			`else:`
			`labels = xp.unique_values(y)`

			`return len(labels) < 3 and (`
			`y.dtype.kind in "biu" or _is_integral_float(labels) # bool, int, uint`
			`)`


			`def check_classification_targets(y):`
			`"""Ensure that target y is of a non-regression type.`

			`Only the following target types (as defined in type_of_target) are allowed:`
			`'binary', 'multiclass', 'multiclass-multioutput',`
			`'multilabel-indicator', 'multilabel-sequences'`

			`Parameters`
			`----------`
			`y : array-like`
			`Target values.`
			`"""`
			`y_type = type_of_target(y, input_name="y")`
			`if y_type not in [`
			`"binary",`
			`"multiclass",`
			`"multiclass-multioutput",`
			`"multilabel-indicator",`
			`"multilabel-sequences",`
			`]:`
			`raise ValueError("Unknown label type: %r" % y_type)`


			`def type_of_target(y, input_name=""):`
			`"""Determine the type of data indicated by the target.`

			`Note that this type is the most specific type that can be inferred.`
			`For example:`

			* ``binary`` is more specific but compatible with ``multiclass``.
			* ``multiclass`` of integers is more specific but compatible with
			``continuous``.
			* ``multilabel-indicator`` is more specific but compatible with
			``multiclass-multioutput``.

			`Parameters`
			`----------`
			`y : {array-like, sparse matrix}`
			Target values. If a sparse matrix, `y` is expected to be a
			`CSR/CSC matrix.`

			`input_name : str, default=""`
			`The data name used to construct the error message.`

			`.. versionadded:: 1.1.0`

			`Returns`
			`-------`
			`target_type : str`
			`One of:`

			* 'continuous': `y` is an array-like of floats that are not all
			`integers, and is 1d or a column vector.`
			* 'continuous-multioutput': `y` is a 2d array of floats that are
			`not all integers, and both dimensions are of size > 1.`
			* 'binary': `y` contains <= 2 discrete values and is 1d or a column
			`vector.`
			* 'multiclass': `y` contains more than two discrete values, is not a
			`sequence of sequences, and is 1d or a column vector.`
			* 'multiclass-multioutput': `y` is a 2d array that contains more
			`than two discrete values, is not a sequence of sequences, and both`
			`dimensions are of size > 1.`
			* 'multilabel-indicator': `y` is a label indicator matrix, an array
			`of two dimensions with at least two columns, and at most 2 unique`
			`values.`
			* 'unknown': `y` is array-like but none of the above, such as a 3d
			`array, sequence of sequences, or an array of non-sequence objects.`

			`Examples`
			`--------`
			`>>> from sklearn.utils.multiclass import type_of_target`
			`>>> import numpy as np`
			`>>> type_of_target([0.1, 0.6])`
			`'continuous'`
			`>>> type_of_target([1, -1, -1, 1])`
			`'binary'`
			`>>> type_of_target(['a', 'b', 'a'])`
			`'binary'`
			`>>> type_of_target([1.0, 2.0])`
			`'binary'`
			`>>> type_of_target([1, 0, 2])`
			`'multiclass'`
			`>>> type_of_target([1.0, 0.0, 3.0])`
			`'multiclass'`
			`>>> type_of_target(['a', 'b', 'c'])`
			`'multiclass'`
			`>>> type_of_target(np.array([[1, 2], [3, 1]]))`
			`'multiclass-multioutput'`
			`>>> type_of_target([[1, 2]])`
			`'multilabel-indicator'`
			`>>> type_of_target(np.array([[1.5, 2.0], [3.0, 1.6]]))`
			`'continuous-multioutput'`
			`>>> type_of_target(np.array([[0, 1], [1, 1]]))`
			`'multilabel-indicator'`
			`"""`
			`xp, is_array_api = get_namespace(y)`
			`valid = (`
			`(isinstance(y, Sequence) or issparse(y) or hasattr(y, "__array__"))`
			`and not isinstance(y, str)`
			`or is_array_api`
			`)`

			`if not valid:`
			`raise ValueError(`
			`"Expected array-like (array or non-string sequence), got %r" % y`
			`)`

			`sparse_pandas = y.__class__.__name__ in ["SparseSeries", "SparseArray"]`
			`if sparse_pandas:`
			`raise ValueError("y cannot be class 'SparseSeries' or 'SparseArray'")`

			`if is_multilabel(y):`
			`return "multilabel-indicator"`

			`# DeprecationWarning will be replaced by ValueError, see NEP 34`
			`# https://numpy.org/neps/nep-0034-infer-dtype-is-object.html`
			`# We therefore catch both deprecation (NumPy < 1.24) warning and`
			`# value error (NumPy >= 1.24).`
			`check_y_kwargs = dict(`
			`accept_sparse=True,`
			`allow_nd=True,`
			`force_all_finite=False,`
			`ensure_2d=False,`
			`ensure_min_samples=0,`
			`ensure_min_features=0,`
			`)`

			`with warnings.catch_warnings():`
			`warnings.simplefilter("error", np.VisibleDeprecationWarning)`
			`if not issparse(y):`
			`try:`
			`y = check_array(y, dtype=None, **check_y_kwargs)`
			`except (np.VisibleDeprecationWarning, ValueError) as e:`
			`if str(e).startswith("Complex data not supported"):`
			`raise`

			`# dtype=object should be provided explicitly for ragged arrays,`
			`# see NEP 34`
			`y = check_array(y, dtype=object, **check_y_kwargs)`

			`# The old sequence of sequences format`
			`try:`
			`if (`
			`not hasattr(y[0], "__array__")`
			`and isinstance(y[0], Sequence)`
			`and not isinstance(y[0], str)`
			`):`
			`raise ValueError(`
			`"You appear to be using a legacy multi-label data"`
			`" representation. Sequence of sequences are no"`
			`" longer supported; use a binary array or sparse"`
			`" matrix instead - the MultiLabelBinarizer"`
			`" transformer can convert to this format."`
			`)`
			`except IndexError:`
			`pass`

			`# Invalid inputs`
			`if y.ndim not in (1, 2):`
			`# Number of dimension greater than 2: [[[1, 2]]]`
			`return "unknown"`
			`if not min(y.shape):`
			`# Empty ndarray: []/[[]]`
			`if y.ndim == 1:`
			`# 1-D empty array: []`
			`return "binary" # []`
			`# 2-D empty array: [[]]`
			`return "unknown"`
			`if not issparse(y) and y.dtype == object and not isinstance(y.flat[0], str):`
			`# [obj_1] and not ["label_1"]`
			`return "unknown"`

			`# Check if multioutput`
			`if y.ndim == 2 and y.shape[1] > 1:`
			`suffix = "-multioutput" # [[1, 2], [1, 2]]`
			`else:`
			`suffix = "" # [1, 2, 3] or [[1], [2], [3]]`

			`# Check float and contains non-integer float values`
			`if y.dtype.kind == "f":`
			`# [.1, .2, 3] or [[.1, .2, 3]] or [[1., .2]] and not [1., 2., 3.]`
			`data = y.data if issparse(y) else y`
			`if xp.any(data != data.astype(int)):`
			`_assert_all_finite(data, input_name=input_name)`
			`return "continuous" + suffix`

			`# Check multiclass`
			`first_row = y[0] if not issparse(y) else y.getrow(0).data`
			`if xp.unique_values(y).shape[0] > 2 or (y.ndim == 2 and len(first_row) > 1):`
			`# [1, 2, 3] or [[1., 2., 3]] or [[1, 2]]`
			`return "multiclass" + suffix`
			`else:`
			`return "binary" # [1, 2] or [["a"], ["b"]]`


			`def _check_partial_fit_first_call(clf, classes=None):`
			`"""Private helper function for factorizing common classes param logic.`

			Estimators that implement the ``partial_fit`` API need to be provided with
			`the list of possible classes at the first call to partial_fit.`

			Subsequent calls to partial_fit should check that ``classes`` is still
			consistent with a previous value of ``clf.classes_`` when provided.

			`This function returns True if it detects that this was the first call to`
			``partial_fit`` on ``clf``. In that case the ``classes_`` attribute is also
			set on ``clf``.

			`"""`
			`if getattr(clf, "classes_", None) is None and classes is None:`
			`raise ValueError("classes must be passed on the first call to partial_fit.")`

			`elif classes is not None:`
			`if getattr(clf, "classes_", None) is not None:`
			`if not np.array_equal(clf.classes_, unique_labels(classes)):`
			`raise ValueError(`
			"`classes=%r` is not the same as on last call "
			`"to partial_fit, was: %r" % (classes, clf.classes_)`
			`)`

			`else:`
			`# This is the first call to partial_fit`
			`clf.classes_ = unique_labels(classes)`
			`return True`

			`# classes is None and clf.classes_ has already previously been set:`
			`# nothing to do`
			`return False`


			`def class_distribution(y, sample_weight=None):`
			`"""Compute class priors from multioutput-multiclass target data.`

			`Parameters`
			`----------`
			`y : {array-like, sparse matrix} of size (n_samples, n_outputs)`
			`The labels for each example.`

			`sample_weight : array-like of shape (n_samples,), default=None`
			`Sample weights.`

			`Returns`
			`-------`
			`classes : list of size n_outputs of ndarray of size (n_classes,)`
			`List of classes for each column.`

			`n_classes : list of int of size n_outputs`
			`Number of classes in each column.`

			`class_prior : list of size n_outputs of ndarray of size (n_classes,)`
			`Class distribution of each column.`
			`"""`
			`classes = []`
			`n_classes = []`
			`class_prior = []`

			`n_samples, n_outputs = y.shape`
			`if sample_weight is not None:`
			`sample_weight = np.asarray(sample_weight)`

			`if issparse(y):`
			`y = y.tocsc()`
			`y_nnz = np.diff(y.indptr)`

			`for k in range(n_outputs):`
			`col_nonzero = y.indices[y.indptr[k] : y.indptr[k + 1]]`
			`# separate sample weights for zero and non-zero elements`
			`if sample_weight is not None:`
			`nz_samp_weight = sample_weight[col_nonzero]`
			`zeros_samp_weight_sum = np.sum(sample_weight) - np.sum(nz_samp_weight)`
			`else:`
			`nz_samp_weight = None`
			`zeros_samp_weight_sum = y.shape[0] - y_nnz[k]`

			`classes_k, y_k = np.unique(`
			`y.data[y.indptr[k] : y.indptr[k + 1]], return_inverse=True`
			`)`
			`class_prior_k = np.bincount(y_k, weights=nz_samp_weight)`

			`# An explicit zero was found, combine its weight with the weight`
			`# of the implicit zeros`
			`if 0 in classes_k:`
			`class_prior_k[classes_k == 0] += zeros_samp_weight_sum`

			`# If an there is an implicit zero and it is not in classes and`
			`# class_prior, make an entry for it`
			`if 0 not in classes_k and y_nnz[k] < y.shape[0]:`
			`classes_k = np.insert(classes_k, 0, 0)`
			`class_prior_k = np.insert(class_prior_k, 0, zeros_samp_weight_sum)`

			`classes.append(classes_k)`
			`n_classes.append(classes_k.shape[0])`
			`class_prior.append(class_prior_k / class_prior_k.sum())`
			`else:`
			`for k in range(n_outputs):`
			`classes_k, y_k = np.unique(y[:, k], return_inverse=True)`
			`classes.append(classes_k)`
			`n_classes.append(classes_k.shape[0])`
			`class_prior_k = np.bincount(y_k, weights=sample_weight)`
			`class_prior.append(class_prior_k / class_prior_k.sum())`

			`return (classes, n_classes, class_prior)`


			`def _ovr_decision_function(predictions, confidences, n_classes):`
			`"""Compute a continuous, tie-breaking OvR decision function from OvO.`

			`It is important to include a continuous value, not only votes,`
			`to make computing AUC or calibration meaningful.`

			`Parameters`
			`----------`
			`predictions : array-like of shape (n_samples, n_classifiers)`
			`Predicted classes for each binary classifier.`

			`confidences : array-like of shape (n_samples, n_classifiers)`
			`Decision functions or predicted probabilities for positive class`
			`for each binary classifier.`

			`n_classes : int`
			`Number of classes. n_classifiers must be`
			``n_classes * (n_classes - 1 ) / 2``.
			`"""`
			`n_samples = predictions.shape[0]`
			`votes = np.zeros((n_samples, n_classes))`
			`sum_of_confidences = np.zeros((n_samples, n_classes))`

			`k = 0`
			`for i in range(n_classes):`
			`for j in range(i + 1, n_classes):`
			`sum_of_confidences[:, i] -= confidences[:, k]`
			`sum_of_confidences[:, j] += confidences[:, k]`
			`votes[predictions[:, k] == 0, i] += 1`
			`votes[predictions[:, k] == 1, j] += 1`
			`k += 1`

			`# Monotonically transform the sum_of_confidences to (-1/3, 1/3)`
			`# and add it with votes. The monotonic transformation is`
			`# f: x -> x / (3 * (\|x\| + 1)), it uses 1/3 instead of 1/2`
			`# to ensure that we won't reach the limits and change vote order.`
			`# The motivation is to use confidence levels as a way to break ties in`
			`# the votes without switching any decision made based on a difference`
			`# of 1 vote.`
			`transformed_confidences = sum_of_confidences / (`
			`3 * (np.abs(sum_of_confidences) + 1)`
			`)`
			`return votes + transformed_confidences`