252 lines
8.9 KiB
Python
252 lines
8.9 KiB
Python
|
"""
|
||
|
Common code for all metrics.
|
||
|
|
||
|
"""
|
||
|
# Authors: Alexandre Gramfort <alexandre.gramfort@inria.fr>
|
||
|
# Mathieu Blondel <mathieu@mblondel.org>
|
||
|
# Olivier Grisel <olivier.grisel@ensta.org>
|
||
|
# Arnaud Joly <a.joly@ulg.ac.be>
|
||
|
# Jochen Wersdorfer <jochen@wersdoerfer.de>
|
||
|
# Lars Buitinck
|
||
|
# Joel Nothman <joel.nothman@gmail.com>
|
||
|
# Noel Dawe <noel@dawe.me>
|
||
|
# License: BSD 3 clause
|
||
|
|
||
|
from itertools import combinations
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
from ..utils import check_array, check_consistent_length
|
||
|
from ..utils.multiclass import type_of_target
|
||
|
|
||
|
|
||
|
def _average_binary_score(binary_metric, y_true, y_score, average,
|
||
|
sample_weight=None):
|
||
|
"""Average a binary metric for multilabel classification.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
y_true : array, shape = [n_samples] or [n_samples, n_classes]
|
||
|
True binary labels in binary label indicators.
|
||
|
|
||
|
y_score : array, shape = [n_samples] or [n_samples, n_classes]
|
||
|
Target scores, can either be probability estimates of the positive
|
||
|
class, confidence values, or binary decisions.
|
||
|
|
||
|
average : string, [None, 'micro', 'macro' (default), 'samples', 'weighted']
|
||
|
If ``None``, the scores for each class are returned. Otherwise,
|
||
|
this determines the type of averaging performed on the data:
|
||
|
|
||
|
``'micro'``:
|
||
|
Calculate metrics globally by considering each element of the label
|
||
|
indicator matrix as a label.
|
||
|
``'macro'``:
|
||
|
Calculate metrics for each label, and find their unweighted
|
||
|
mean. This does not take label imbalance into account.
|
||
|
``'weighted'``:
|
||
|
Calculate metrics for each label, and find their average, weighted
|
||
|
by support (the number of true instances for each label).
|
||
|
``'samples'``:
|
||
|
Calculate metrics for each instance, and find their average.
|
||
|
|
||
|
Will be ignored when ``y_true`` is binary.
|
||
|
|
||
|
sample_weight : array-like of shape (n_samples,), default=None
|
||
|
Sample weights.
|
||
|
|
||
|
binary_metric : callable, returns shape [n_classes]
|
||
|
The binary metric function to use.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
score : float or array of shape [n_classes]
|
||
|
If not ``None``, average the score, else return the score for each
|
||
|
classes.
|
||
|
|
||
|
"""
|
||
|
average_options = (None, 'micro', 'macro', 'weighted', 'samples')
|
||
|
if average not in average_options:
|
||
|
raise ValueError('average has to be one of {0}'
|
||
|
''.format(average_options))
|
||
|
|
||
|
y_type = type_of_target(y_true)
|
||
|
if y_type not in ("binary", "multilabel-indicator"):
|
||
|
raise ValueError("{0} format is not supported".format(y_type))
|
||
|
|
||
|
if y_type == "binary":
|
||
|
return binary_metric(y_true, y_score, sample_weight=sample_weight)
|
||
|
|
||
|
check_consistent_length(y_true, y_score, sample_weight)
|
||
|
y_true = check_array(y_true)
|
||
|
y_score = check_array(y_score)
|
||
|
|
||
|
not_average_axis = 1
|
||
|
score_weight = sample_weight
|
||
|
average_weight = None
|
||
|
|
||
|
if average == "micro":
|
||
|
if score_weight is not None:
|
||
|
score_weight = np.repeat(score_weight, y_true.shape[1])
|
||
|
y_true = y_true.ravel()
|
||
|
y_score = y_score.ravel()
|
||
|
|
||
|
elif average == 'weighted':
|
||
|
if score_weight is not None:
|
||
|
average_weight = np.sum(np.multiply(
|
||
|
y_true, np.reshape(score_weight, (-1, 1))), axis=0)
|
||
|
else:
|
||
|
average_weight = np.sum(y_true, axis=0)
|
||
|
if np.isclose(average_weight.sum(), 0.0):
|
||
|
return 0
|
||
|
|
||
|
elif average == 'samples':
|
||
|
# swap average_weight <-> score_weight
|
||
|
average_weight = score_weight
|
||
|
score_weight = None
|
||
|
not_average_axis = 0
|
||
|
|
||
|
if y_true.ndim == 1:
|
||
|
y_true = y_true.reshape((-1, 1))
|
||
|
|
||
|
if y_score.ndim == 1:
|
||
|
y_score = y_score.reshape((-1, 1))
|
||
|
|
||
|
n_classes = y_score.shape[not_average_axis]
|
||
|
score = np.zeros((n_classes,))
|
||
|
for c in range(n_classes):
|
||
|
y_true_c = y_true.take([c], axis=not_average_axis).ravel()
|
||
|
y_score_c = y_score.take([c], axis=not_average_axis).ravel()
|
||
|
score[c] = binary_metric(y_true_c, y_score_c,
|
||
|
sample_weight=score_weight)
|
||
|
|
||
|
# Average the results
|
||
|
if average is not None:
|
||
|
if average_weight is not None:
|
||
|
# Scores with 0 weights are forced to be 0, preventing the average
|
||
|
# score from being affected by 0-weighted NaN elements.
|
||
|
average_weight = np.asarray(average_weight)
|
||
|
score[average_weight == 0] = 0
|
||
|
return np.average(score, weights=average_weight)
|
||
|
else:
|
||
|
return score
|
||
|
|
||
|
|
||
|
def _average_multiclass_ovo_score(binary_metric, y_true, y_score,
|
||
|
average='macro'):
|
||
|
"""Average one-versus-one scores for multiclass classification.
|
||
|
|
||
|
Uses the binary metric for one-vs-one multiclass classification,
|
||
|
where the score is computed according to the Hand & Till (2001) algorithm.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
binary_metric : callable
|
||
|
The binary metric function to use that accepts the following as input:
|
||
|
y_true_target : array, shape = [n_samples_target]
|
||
|
Some sub-array of y_true for a pair of classes designated
|
||
|
positive and negative in the one-vs-one scheme.
|
||
|
y_score_target : array, shape = [n_samples_target]
|
||
|
Scores corresponding to the probability estimates
|
||
|
of a sample belonging to the designated positive class label
|
||
|
|
||
|
y_true : array-like of shape (n_samples,)
|
||
|
True multiclass labels.
|
||
|
|
||
|
y_score : array-like of shape (n_samples, n_classes)
|
||
|
Target scores corresponding to probability estimates of a sample
|
||
|
belonging to a particular class.
|
||
|
|
||
|
average : {'macro', 'weighted'}, default='macro'
|
||
|
Determines the type of averaging performed on the pairwise binary
|
||
|
metric scores:
|
||
|
``'macro'``:
|
||
|
Calculate metrics for each label, and find their unweighted
|
||
|
mean. This does not take label imbalance into account. Classes
|
||
|
are assumed to be uniformly distributed.
|
||
|
``'weighted'``:
|
||
|
Calculate metrics for each label, taking into account the
|
||
|
prevalence of the classes.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
score : float
|
||
|
Average of the pairwise binary metric scores.
|
||
|
"""
|
||
|
check_consistent_length(y_true, y_score)
|
||
|
|
||
|
y_true_unique = np.unique(y_true)
|
||
|
n_classes = y_true_unique.shape[0]
|
||
|
n_pairs = n_classes * (n_classes - 1) // 2
|
||
|
pair_scores = np.empty(n_pairs)
|
||
|
|
||
|
is_weighted = average == "weighted"
|
||
|
prevalence = np.empty(n_pairs) if is_weighted else None
|
||
|
|
||
|
# Compute scores treating a as positive class and b as negative class,
|
||
|
# then b as positive class and a as negative class
|
||
|
for ix, (a, b) in enumerate(combinations(y_true_unique, 2)):
|
||
|
a_mask = y_true == a
|
||
|
b_mask = y_true == b
|
||
|
ab_mask = np.logical_or(a_mask, b_mask)
|
||
|
|
||
|
if is_weighted:
|
||
|
prevalence[ix] = np.average(ab_mask)
|
||
|
|
||
|
a_true = a_mask[ab_mask]
|
||
|
b_true = b_mask[ab_mask]
|
||
|
|
||
|
a_true_score = binary_metric(a_true, y_score[ab_mask, a])
|
||
|
b_true_score = binary_metric(b_true, y_score[ab_mask, b])
|
||
|
pair_scores[ix] = (a_true_score + b_true_score) / 2
|
||
|
|
||
|
return np.average(pair_scores, weights=prevalence)
|
||
|
|
||
|
|
||
|
def _check_pos_label_consistency(pos_label, y_true):
|
||
|
"""Check if `pos_label` need to be specified or not.
|
||
|
|
||
|
In binary classification, we fix `pos_label=1` if the labels are in the set
|
||
|
{-1, 1} or {0, 1}. Otherwise, we raise an error asking to specify the
|
||
|
`pos_label` parameters.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
pos_label : int, str or None
|
||
|
The positive label.
|
||
|
y_true : ndarray of shape (n_samples,)
|
||
|
The target vector.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
pos_label : int
|
||
|
If `pos_label` can be inferred, it will be returned.
|
||
|
|
||
|
Raises
|
||
|
------
|
||
|
ValueError
|
||
|
In the case that `y_true` does not have label in {-1, 1} or {0, 1},
|
||
|
it will raise a `ValueError`.
|
||
|
"""
|
||
|
# ensure binary classification if pos_label is not specified
|
||
|
# classes.dtype.kind in ('O', 'U', 'S') is required to avoid
|
||
|
# triggering a FutureWarning by calling np.array_equal(a, b)
|
||
|
# when elements in the two arrays are not comparable.
|
||
|
classes = np.unique(y_true)
|
||
|
if (pos_label is None and (
|
||
|
classes.dtype.kind in 'OUS' or
|
||
|
not (np.array_equal(classes, [0, 1]) or
|
||
|
np.array_equal(classes, [-1, 1]) or
|
||
|
np.array_equal(classes, [0]) or
|
||
|
np.array_equal(classes, [-1]) or
|
||
|
np.array_equal(classes, [1])))):
|
||
|
classes_repr = ", ".join(repr(c) for c in classes)
|
||
|
raise ValueError(
|
||
|
f"y_true takes value in {{{classes_repr}}} and pos_label is not "
|
||
|
f"specified: either make y_true take value in {{0, 1}} or "
|
||
|
f"{{-1, 1}} or pass pos_label explicitly."
|
||
|
)
|
||
|
elif pos_label is None:
|
||
|
pos_label = 1.0
|
||
|
|
||
|
return pos_label
|