1498 lines
51 KiB
Python
1498 lines
51 KiB
Python
"""Metrics to assess performance on regression task.
|
|
|
|
Functions named as ``*_score`` return a scalar value to maximize: the higher
|
|
the better.
|
|
|
|
Function named as ``*_error`` or ``*_loss`` return a scalar value to minimize:
|
|
the lower the better.
|
|
"""
|
|
|
|
# Authors: Alexandre Gramfort <alexandre.gramfort@inria.fr>
|
|
# Mathieu Blondel <mathieu@mblondel.org>
|
|
# Olivier Grisel <olivier.grisel@ensta.org>
|
|
# Arnaud Joly <a.joly@ulg.ac.be>
|
|
# Jochen Wersdorfer <jochen@wersdoerfer.de>
|
|
# Lars Buitinck
|
|
# Joel Nothman <joel.nothman@gmail.com>
|
|
# Karan Desai <karandesai281196@gmail.com>
|
|
# Noel Dawe <noel@dawe.me>
|
|
# Manoj Kumar <manojkumarsivaraj334@gmail.com>
|
|
# Michael Eickenberg <michael.eickenberg@gmail.com>
|
|
# Konstantin Shmelkov <konstantin.shmelkov@polytechnique.edu>
|
|
# Christian Lorentzen <lorentzen.ch@gmail.com>
|
|
# Ashutosh Hathidara <ashutoshhathidara98@gmail.com>
|
|
# Uttam kumar <bajiraouttamsinha@gmail.com>
|
|
# Sylvain Marie <sylvain.marie@se.com>
|
|
# Ohad Michel <ohadmich@gmail.com>
|
|
# License: BSD 3 clause
|
|
|
|
import numbers
|
|
import warnings
|
|
|
|
import numpy as np
|
|
from scipy.special import xlogy
|
|
|
|
from ..exceptions import UndefinedMetricWarning
|
|
from ..utils.validation import (
|
|
check_array,
|
|
check_consistent_length,
|
|
check_scalar,
|
|
_num_samples,
|
|
column_or_1d,
|
|
_check_sample_weight,
|
|
)
|
|
from ..utils.stats import _weighted_percentile
|
|
|
|
|
|
__ALL__ = [
|
|
"max_error",
|
|
"mean_absolute_error",
|
|
"mean_squared_error",
|
|
"mean_squared_log_error",
|
|
"median_absolute_error",
|
|
"mean_absolute_percentage_error",
|
|
"mean_pinball_loss",
|
|
"r2_score",
|
|
"explained_variance_score",
|
|
"mean_tweedie_deviance",
|
|
"mean_poisson_deviance",
|
|
"mean_gamma_deviance",
|
|
"d2_tweedie_score",
|
|
"d2_pinball_score",
|
|
"d2_absolute_error_score",
|
|
]
|
|
|
|
|
|
def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric"):
|
|
"""Check that y_true and y_pred belong to the same regression task.
|
|
|
|
Parameters
|
|
----------
|
|
y_true : array-like
|
|
|
|
y_pred : array-like
|
|
|
|
multioutput : array-like or string in ['raw_values', uniform_average',
|
|
'variance_weighted'] or None
|
|
None is accepted due to backward compatibility of r2_score().
|
|
|
|
dtype : str or list, default="numeric"
|
|
the dtype argument passed to check_array.
|
|
|
|
Returns
|
|
-------
|
|
type_true : one of {'continuous', continuous-multioutput'}
|
|
The type of the true target data, as output by
|
|
'utils.multiclass.type_of_target'.
|
|
|
|
y_true : array-like of shape (n_samples, n_outputs)
|
|
Ground truth (correct) target values.
|
|
|
|
y_pred : array-like of shape (n_samples, n_outputs)
|
|
Estimated target values.
|
|
|
|
multioutput : array-like of shape (n_outputs) or string in ['raw_values',
|
|
uniform_average', 'variance_weighted'] or None
|
|
Custom output weights if ``multioutput`` is array-like or
|
|
just the corresponding argument if ``multioutput`` is a
|
|
correct keyword.
|
|
"""
|
|
check_consistent_length(y_true, y_pred)
|
|
y_true = check_array(y_true, ensure_2d=False, dtype=dtype)
|
|
y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype)
|
|
|
|
if y_true.ndim == 1:
|
|
y_true = y_true.reshape((-1, 1))
|
|
|
|
if y_pred.ndim == 1:
|
|
y_pred = y_pred.reshape((-1, 1))
|
|
|
|
if y_true.shape[1] != y_pred.shape[1]:
|
|
raise ValueError(
|
|
"y_true and y_pred have different number of output ({0}!={1})".format(
|
|
y_true.shape[1], y_pred.shape[1]
|
|
)
|
|
)
|
|
|
|
n_outputs = y_true.shape[1]
|
|
allowed_multioutput_str = ("raw_values", "uniform_average", "variance_weighted")
|
|
if isinstance(multioutput, str):
|
|
if multioutput not in allowed_multioutput_str:
|
|
raise ValueError(
|
|
"Allowed 'multioutput' string values are {}. "
|
|
"You provided multioutput={!r}".format(
|
|
allowed_multioutput_str, multioutput
|
|
)
|
|
)
|
|
elif multioutput is not None:
|
|
multioutput = check_array(multioutput, ensure_2d=False)
|
|
if n_outputs == 1:
|
|
raise ValueError("Custom weights are useful only in multi-output cases.")
|
|
elif n_outputs != len(multioutput):
|
|
raise ValueError(
|
|
"There must be equally many custom weights (%d) as outputs (%d)."
|
|
% (len(multioutput), n_outputs)
|
|
)
|
|
y_type = "continuous" if n_outputs == 1 else "continuous-multioutput"
|
|
|
|
return y_type, y_true, y_pred, multioutput
|
|
|
|
|
|
def mean_absolute_error(
|
|
y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
|
|
):
|
|
"""Mean absolute error regression loss.
|
|
|
|
Read more in the :ref:`User Guide <mean_absolute_error>`.
|
|
|
|
Parameters
|
|
----------
|
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
Ground truth (correct) target values.
|
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
Estimated target values.
|
|
|
|
sample_weight : array-like of shape (n_samples,), default=None
|
|
Sample weights.
|
|
|
|
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
|
|
(n_outputs,), default='uniform_average'
|
|
Defines aggregating of multiple output values.
|
|
Array-like value defines weights used to average errors.
|
|
|
|
'raw_values' :
|
|
Returns a full set of errors in case of multioutput input.
|
|
|
|
'uniform_average' :
|
|
Errors of all outputs are averaged with uniform weight.
|
|
|
|
Returns
|
|
-------
|
|
loss : float or ndarray of floats
|
|
If multioutput is 'raw_values', then mean absolute error is returned
|
|
for each output separately.
|
|
If multioutput is 'uniform_average' or an ndarray of weights, then the
|
|
weighted average of all output errors is returned.
|
|
|
|
MAE output is non-negative floating point. The best value is 0.0.
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.metrics import mean_absolute_error
|
|
>>> y_true = [3, -0.5, 2, 7]
|
|
>>> y_pred = [2.5, 0.0, 2, 8]
|
|
>>> mean_absolute_error(y_true, y_pred)
|
|
0.5
|
|
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
|
|
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
|
|
>>> mean_absolute_error(y_true, y_pred)
|
|
0.75
|
|
>>> mean_absolute_error(y_true, y_pred, multioutput='raw_values')
|
|
array([0.5, 1. ])
|
|
>>> mean_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])
|
|
0.85...
|
|
"""
|
|
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
|
y_true, y_pred, multioutput
|
|
)
|
|
check_consistent_length(y_true, y_pred, sample_weight)
|
|
output_errors = np.average(np.abs(y_pred - y_true), weights=sample_weight, axis=0)
|
|
if isinstance(multioutput, str):
|
|
if multioutput == "raw_values":
|
|
return output_errors
|
|
elif multioutput == "uniform_average":
|
|
# pass None as weights to np.average: uniform mean
|
|
multioutput = None
|
|
|
|
return np.average(output_errors, weights=multioutput)
|
|
|
|
|
|
def mean_pinball_loss(
|
|
y_true, y_pred, *, sample_weight=None, alpha=0.5, multioutput="uniform_average"
|
|
):
|
|
"""Pinball loss for quantile regression.
|
|
|
|
Read more in the :ref:`User Guide <pinball_loss>`.
|
|
|
|
Parameters
|
|
----------
|
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
Ground truth (correct) target values.
|
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
Estimated target values.
|
|
|
|
sample_weight : array-like of shape (n_samples,), default=None
|
|
Sample weights.
|
|
|
|
alpha : float, slope of the pinball loss, default=0.5,
|
|
This loss is equivalent to :ref:`mean_absolute_error` when `alpha=0.5`,
|
|
`alpha=0.95` is minimized by estimators of the 95th percentile.
|
|
|
|
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
|
|
(n_outputs,), default='uniform_average'
|
|
Defines aggregating of multiple output values.
|
|
Array-like value defines weights used to average errors.
|
|
|
|
'raw_values' :
|
|
Returns a full set of errors in case of multioutput input.
|
|
|
|
'uniform_average' :
|
|
Errors of all outputs are averaged with uniform weight.
|
|
|
|
Returns
|
|
-------
|
|
loss : float or ndarray of floats
|
|
If multioutput is 'raw_values', then mean absolute error is returned
|
|
for each output separately.
|
|
If multioutput is 'uniform_average' or an ndarray of weights, then the
|
|
weighted average of all output errors is returned.
|
|
|
|
The pinball loss output is a non-negative floating point. The best
|
|
value is 0.0.
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.metrics import mean_pinball_loss
|
|
>>> y_true = [1, 2, 3]
|
|
>>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.1)
|
|
0.03...
|
|
>>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.1)
|
|
0.3...
|
|
>>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.9)
|
|
0.3...
|
|
>>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.9)
|
|
0.03...
|
|
>>> mean_pinball_loss(y_true, y_true, alpha=0.1)
|
|
0.0
|
|
>>> mean_pinball_loss(y_true, y_true, alpha=0.9)
|
|
0.0
|
|
"""
|
|
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
|
y_true, y_pred, multioutput
|
|
)
|
|
check_consistent_length(y_true, y_pred, sample_weight)
|
|
diff = y_true - y_pred
|
|
sign = (diff >= 0).astype(diff.dtype)
|
|
loss = alpha * sign * diff - (1 - alpha) * (1 - sign) * diff
|
|
output_errors = np.average(loss, weights=sample_weight, axis=0)
|
|
if isinstance(multioutput, str):
|
|
if multioutput == "raw_values":
|
|
return output_errors
|
|
elif multioutput == "uniform_average":
|
|
# pass None as weights to np.average: uniform mean
|
|
multioutput = None
|
|
else:
|
|
raise ValueError(
|
|
"multioutput is expected to be 'raw_values' "
|
|
"or 'uniform_average' but we got %r"
|
|
" instead." % multioutput
|
|
)
|
|
|
|
return np.average(output_errors, weights=multioutput)
|
|
|
|
|
|
def mean_absolute_percentage_error(
|
|
y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
|
|
):
|
|
"""Mean absolute percentage error (MAPE) regression loss.
|
|
|
|
Note here that the output is not a percentage in the range [0, 100]
|
|
and a value of 100 does not mean 100% but 1e2. Furthermore, the output
|
|
can be arbitrarily high when `y_true` is small (which is specific to the
|
|
metric) or when `abs(y_true - y_pred)` is large (which is common for most
|
|
regression metrics). Read more in the
|
|
:ref:`User Guide <mean_absolute_percentage_error>`.
|
|
|
|
.. versionadded:: 0.24
|
|
|
|
Parameters
|
|
----------
|
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
Ground truth (correct) target values.
|
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
Estimated target values.
|
|
|
|
sample_weight : array-like of shape (n_samples,), default=None
|
|
Sample weights.
|
|
|
|
multioutput : {'raw_values', 'uniform_average'} or array-like
|
|
Defines aggregating of multiple output values.
|
|
Array-like value defines weights used to average errors.
|
|
If input is list then the shape must be (n_outputs,).
|
|
|
|
'raw_values' :
|
|
Returns a full set of errors in case of multioutput input.
|
|
|
|
'uniform_average' :
|
|
Errors of all outputs are averaged with uniform weight.
|
|
|
|
Returns
|
|
-------
|
|
loss : float or ndarray of floats
|
|
If multioutput is 'raw_values', then mean absolute percentage error
|
|
is returned for each output separately.
|
|
If multioutput is 'uniform_average' or an ndarray of weights, then the
|
|
weighted average of all output errors is returned.
|
|
|
|
MAPE output is non-negative floating point. The best value is 0.0.
|
|
But note that bad predictions can lead to arbitrarily large
|
|
MAPE values, especially if some `y_true` values are very close to zero.
|
|
Note that we return a large value instead of `inf` when `y_true` is zero.
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.metrics import mean_absolute_percentage_error
|
|
>>> y_true = [3, -0.5, 2, 7]
|
|
>>> y_pred = [2.5, 0.0, 2, 8]
|
|
>>> mean_absolute_percentage_error(y_true, y_pred)
|
|
0.3273...
|
|
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
|
|
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
|
|
>>> mean_absolute_percentage_error(y_true, y_pred)
|
|
0.5515...
|
|
>>> mean_absolute_percentage_error(y_true, y_pred, multioutput=[0.3, 0.7])
|
|
0.6198...
|
|
>>> # the value when some element of the y_true is zero is arbitrarily high because
|
|
>>> # of the division by epsilon
|
|
>>> y_true = [1., 0., 2.4, 7.]
|
|
>>> y_pred = [1.2, 0.1, 2.4, 8.]
|
|
>>> mean_absolute_percentage_error(y_true, y_pred)
|
|
112589990684262.48
|
|
"""
|
|
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
|
y_true, y_pred, multioutput
|
|
)
|
|
check_consistent_length(y_true, y_pred, sample_weight)
|
|
epsilon = np.finfo(np.float64).eps
|
|
mape = np.abs(y_pred - y_true) / np.maximum(np.abs(y_true), epsilon)
|
|
output_errors = np.average(mape, weights=sample_weight, axis=0)
|
|
if isinstance(multioutput, str):
|
|
if multioutput == "raw_values":
|
|
return output_errors
|
|
elif multioutput == "uniform_average":
|
|
# pass None as weights to np.average: uniform mean
|
|
multioutput = None
|
|
|
|
return np.average(output_errors, weights=multioutput)
|
|
|
|
|
|
def mean_squared_error(
|
|
y_true, y_pred, *, sample_weight=None, multioutput="uniform_average", squared=True
|
|
):
|
|
"""Mean squared error regression loss.
|
|
|
|
Read more in the :ref:`User Guide <mean_squared_error>`.
|
|
|
|
Parameters
|
|
----------
|
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
Ground truth (correct) target values.
|
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
Estimated target values.
|
|
|
|
sample_weight : array-like of shape (n_samples,), default=None
|
|
Sample weights.
|
|
|
|
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
|
|
(n_outputs,), default='uniform_average'
|
|
Defines aggregating of multiple output values.
|
|
Array-like value defines weights used to average errors.
|
|
|
|
'raw_values' :
|
|
Returns a full set of errors in case of multioutput input.
|
|
|
|
'uniform_average' :
|
|
Errors of all outputs are averaged with uniform weight.
|
|
|
|
squared : bool, default=True
|
|
If True returns MSE value, if False returns RMSE value.
|
|
|
|
Returns
|
|
-------
|
|
loss : float or ndarray of floats
|
|
A non-negative floating point value (the best value is 0.0), or an
|
|
array of floating point values, one for each individual target.
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.metrics import mean_squared_error
|
|
>>> y_true = [3, -0.5, 2, 7]
|
|
>>> y_pred = [2.5, 0.0, 2, 8]
|
|
>>> mean_squared_error(y_true, y_pred)
|
|
0.375
|
|
>>> y_true = [3, -0.5, 2, 7]
|
|
>>> y_pred = [2.5, 0.0, 2, 8]
|
|
>>> mean_squared_error(y_true, y_pred, squared=False)
|
|
0.612...
|
|
>>> y_true = [[0.5, 1],[-1, 1],[7, -6]]
|
|
>>> y_pred = [[0, 2],[-1, 2],[8, -5]]
|
|
>>> mean_squared_error(y_true, y_pred)
|
|
0.708...
|
|
>>> mean_squared_error(y_true, y_pred, squared=False)
|
|
0.822...
|
|
>>> mean_squared_error(y_true, y_pred, multioutput='raw_values')
|
|
array([0.41666667, 1. ])
|
|
>>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7])
|
|
0.825...
|
|
"""
|
|
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
|
y_true, y_pred, multioutput
|
|
)
|
|
check_consistent_length(y_true, y_pred, sample_weight)
|
|
output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
|
|
|
|
if not squared:
|
|
output_errors = np.sqrt(output_errors)
|
|
|
|
if isinstance(multioutput, str):
|
|
if multioutput == "raw_values":
|
|
return output_errors
|
|
elif multioutput == "uniform_average":
|
|
# pass None as weights to np.average: uniform mean
|
|
multioutput = None
|
|
|
|
return np.average(output_errors, weights=multioutput)
|
|
|
|
|
|
def mean_squared_log_error(
|
|
y_true, y_pred, *, sample_weight=None, multioutput="uniform_average", squared=True
|
|
):
|
|
"""Mean squared logarithmic error regression loss.
|
|
|
|
Read more in the :ref:`User Guide <mean_squared_log_error>`.
|
|
|
|
Parameters
|
|
----------
|
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
Ground truth (correct) target values.
|
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
Estimated target values.
|
|
|
|
sample_weight : array-like of shape (n_samples,), default=None
|
|
Sample weights.
|
|
|
|
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
|
|
(n_outputs,), default='uniform_average'
|
|
|
|
Defines aggregating of multiple output values.
|
|
Array-like value defines weights used to average errors.
|
|
|
|
'raw_values' :
|
|
Returns a full set of errors when the input is of multioutput
|
|
format.
|
|
|
|
'uniform_average' :
|
|
Errors of all outputs are averaged with uniform weight.
|
|
squared : bool, default=True
|
|
If True returns MSLE (mean squared log error) value.
|
|
If False returns RMSLE (root mean squared log error) value.
|
|
|
|
Returns
|
|
-------
|
|
loss : float or ndarray of floats
|
|
A non-negative floating point value (the best value is 0.0), or an
|
|
array of floating point values, one for each individual target.
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.metrics import mean_squared_log_error
|
|
>>> y_true = [3, 5, 2.5, 7]
|
|
>>> y_pred = [2.5, 5, 4, 8]
|
|
>>> mean_squared_log_error(y_true, y_pred)
|
|
0.039...
|
|
>>> mean_squared_log_error(y_true, y_pred, squared=False)
|
|
0.199...
|
|
>>> y_true = [[0.5, 1], [1, 2], [7, 6]]
|
|
>>> y_pred = [[0.5, 2], [1, 2.5], [8, 8]]
|
|
>>> mean_squared_log_error(y_true, y_pred)
|
|
0.044...
|
|
>>> mean_squared_log_error(y_true, y_pred, multioutput='raw_values')
|
|
array([0.00462428, 0.08377444])
|
|
>>> mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
|
|
0.060...
|
|
"""
|
|
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
|
y_true, y_pred, multioutput
|
|
)
|
|
check_consistent_length(y_true, y_pred, sample_weight)
|
|
|
|
if (y_true < 0).any() or (y_pred < 0).any():
|
|
raise ValueError(
|
|
"Mean Squared Logarithmic Error cannot be used when "
|
|
"targets contain negative values."
|
|
)
|
|
|
|
return mean_squared_error(
|
|
np.log1p(y_true),
|
|
np.log1p(y_pred),
|
|
sample_weight=sample_weight,
|
|
multioutput=multioutput,
|
|
squared=squared,
|
|
)
|
|
|
|
|
|
def median_absolute_error(
|
|
y_true, y_pred, *, multioutput="uniform_average", sample_weight=None
|
|
):
|
|
"""Median absolute error regression loss.
|
|
|
|
Median absolute error output is non-negative floating point. The best value
|
|
is 0.0. Read more in the :ref:`User Guide <median_absolute_error>`.
|
|
|
|
Parameters
|
|
----------
|
|
y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)
|
|
Ground truth (correct) target values.
|
|
|
|
y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
|
|
Estimated target values.
|
|
|
|
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
|
|
(n_outputs,), default='uniform_average'
|
|
Defines aggregating of multiple output values. Array-like value defines
|
|
weights used to average errors.
|
|
|
|
'raw_values' :
|
|
Returns a full set of errors in case of multioutput input.
|
|
|
|
'uniform_average' :
|
|
Errors of all outputs are averaged with uniform weight.
|
|
|
|
sample_weight : array-like of shape (n_samples,), default=None
|
|
Sample weights.
|
|
|
|
.. versionadded:: 0.24
|
|
|
|
Returns
|
|
-------
|
|
loss : float or ndarray of floats
|
|
If multioutput is 'raw_values', then mean absolute error is returned
|
|
for each output separately.
|
|
If multioutput is 'uniform_average' or an ndarray of weights, then the
|
|
weighted average of all output errors is returned.
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.metrics import median_absolute_error
|
|
>>> y_true = [3, -0.5, 2, 7]
|
|
>>> y_pred = [2.5, 0.0, 2, 8]
|
|
>>> median_absolute_error(y_true, y_pred)
|
|
0.5
|
|
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
|
|
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
|
|
>>> median_absolute_error(y_true, y_pred)
|
|
0.75
|
|
>>> median_absolute_error(y_true, y_pred, multioutput='raw_values')
|
|
array([0.5, 1. ])
|
|
>>> median_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])
|
|
0.85
|
|
"""
|
|
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
|
y_true, y_pred, multioutput
|
|
)
|
|
if sample_weight is None:
|
|
output_errors = np.median(np.abs(y_pred - y_true), axis=0)
|
|
else:
|
|
sample_weight = _check_sample_weight(sample_weight, y_pred)
|
|
output_errors = _weighted_percentile(
|
|
np.abs(y_pred - y_true), sample_weight=sample_weight
|
|
)
|
|
if isinstance(multioutput, str):
|
|
if multioutput == "raw_values":
|
|
return output_errors
|
|
elif multioutput == "uniform_average":
|
|
# pass None as weights to np.average: uniform mean
|
|
multioutput = None
|
|
|
|
return np.average(output_errors, weights=multioutput)
|
|
|
|
|
|
def _assemble_r2_explained_variance(
|
|
numerator, denominator, n_outputs, multioutput, force_finite
|
|
):
|
|
"""Common part used by explained variance score and :math:`R^2` score."""
|
|
|
|
nonzero_denominator = denominator != 0
|
|
|
|
if not force_finite:
|
|
# Standard formula, that may lead to NaN or -Inf
|
|
output_scores = 1 - (numerator / denominator)
|
|
else:
|
|
nonzero_numerator = numerator != 0
|
|
# Default = Zero Numerator = perfect predictions. Set to 1.0
|
|
# (note: even if denominator is zero, thus avoiding NaN scores)
|
|
output_scores = np.ones([n_outputs])
|
|
# Non-zero Numerator and Non-zero Denominator: use the formula
|
|
valid_score = nonzero_denominator & nonzero_numerator
|
|
output_scores[valid_score] = 1 - (
|
|
numerator[valid_score] / denominator[valid_score]
|
|
)
|
|
# Non-zero Numerator and Zero Denominator:
|
|
# arbitrary set to 0.0 to avoid -inf scores
|
|
output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0
|
|
|
|
if isinstance(multioutput, str):
|
|
if multioutput == "raw_values":
|
|
# return scores individually
|
|
return output_scores
|
|
elif multioutput == "uniform_average":
|
|
# Passing None as weights to np.average results is uniform mean
|
|
avg_weights = None
|
|
elif multioutput == "variance_weighted":
|
|
avg_weights = denominator
|
|
if not np.any(nonzero_denominator):
|
|
# All weights are zero, np.average would raise a ZeroDiv error.
|
|
# This only happens when all y are constant (or 1-element long)
|
|
# Since weights are all equal, fall back to uniform weights.
|
|
avg_weights = None
|
|
else:
|
|
avg_weights = multioutput
|
|
|
|
return np.average(output_scores, weights=avg_weights)
|
|
|
|
|
|
def explained_variance_score(
|
|
y_true,
|
|
y_pred,
|
|
*,
|
|
sample_weight=None,
|
|
multioutput="uniform_average",
|
|
force_finite=True,
|
|
):
|
|
"""Explained variance regression score function.
|
|
|
|
Best possible score is 1.0, lower values are worse.
|
|
|
|
In the particular case when ``y_true`` is constant, the explained variance
|
|
score is not finite: it is either ``NaN`` (perfect predictions) or
|
|
``-Inf`` (imperfect predictions). To prevent such non-finite numbers to
|
|
pollute higher-level experiments such as a grid search cross-validation,
|
|
by default these cases are replaced with 1.0 (perfect predictions) or 0.0
|
|
(imperfect predictions) respectively. If ``force_finite``
|
|
is set to ``False``, this score falls back on the original :math:`R^2`
|
|
definition.
|
|
|
|
.. note::
|
|
The Explained Variance score is similar to the
|
|
:func:`R^2 score <r2_score>`, with the notable difference that it
|
|
does not account for systematic offsets in the prediction. Most often
|
|
the :func:`R^2 score <r2_score>` should be preferred.
|
|
|
|
Read more in the :ref:`User Guide <explained_variance_score>`.
|
|
|
|
Parameters
|
|
----------
|
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
Ground truth (correct) target values.
|
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
Estimated target values.
|
|
|
|
sample_weight : array-like of shape (n_samples,), default=None
|
|
Sample weights.
|
|
|
|
multioutput : {'raw_values', 'uniform_average', 'variance_weighted'} or \
|
|
array-like of shape (n_outputs,), default='uniform_average'
|
|
Defines aggregating of multiple output scores.
|
|
Array-like value defines weights used to average scores.
|
|
|
|
'raw_values' :
|
|
Returns a full set of scores in case of multioutput input.
|
|
|
|
'uniform_average' :
|
|
Scores of all outputs are averaged with uniform weight.
|
|
|
|
'variance_weighted' :
|
|
Scores of all outputs are averaged, weighted by the variances
|
|
of each individual output.
|
|
|
|
force_finite : bool, default=True
|
|
Flag indicating if ``NaN`` and ``-Inf`` scores resulting from constant
|
|
data should be replaced with real numbers (``1.0`` if prediction is
|
|
perfect, ``0.0`` otherwise). Default is ``True``, a convenient setting
|
|
for hyperparameters' search procedures (e.g. grid search
|
|
cross-validation).
|
|
|
|
.. versionadded:: 1.1
|
|
|
|
Returns
|
|
-------
|
|
score : float or ndarray of floats
|
|
The explained variance or ndarray if 'multioutput' is 'raw_values'.
|
|
|
|
See Also
|
|
--------
|
|
r2_score :
|
|
Similar metric, but accounting for systematic offsets in
|
|
prediction.
|
|
|
|
Notes
|
|
-----
|
|
This is not a symmetric function.
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.metrics import explained_variance_score
|
|
>>> y_true = [3, -0.5, 2, 7]
|
|
>>> y_pred = [2.5, 0.0, 2, 8]
|
|
>>> explained_variance_score(y_true, y_pred)
|
|
0.957...
|
|
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
|
|
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
|
|
>>> explained_variance_score(y_true, y_pred, multioutput='uniform_average')
|
|
0.983...
|
|
>>> y_true = [-2, -2, -2]
|
|
>>> y_pred = [-2, -2, -2]
|
|
>>> explained_variance_score(y_true, y_pred)
|
|
1.0
|
|
>>> explained_variance_score(y_true, y_pred, force_finite=False)
|
|
nan
|
|
>>> y_true = [-2, -2, -2]
|
|
>>> y_pred = [-2, -2, -2 + 1e-8]
|
|
>>> explained_variance_score(y_true, y_pred)
|
|
0.0
|
|
>>> explained_variance_score(y_true, y_pred, force_finite=False)
|
|
-inf
|
|
"""
|
|
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
|
y_true, y_pred, multioutput
|
|
)
|
|
check_consistent_length(y_true, y_pred, sample_weight)
|
|
|
|
y_diff_avg = np.average(y_true - y_pred, weights=sample_weight, axis=0)
|
|
numerator = np.average(
|
|
(y_true - y_pred - y_diff_avg) ** 2, weights=sample_weight, axis=0
|
|
)
|
|
|
|
y_true_avg = np.average(y_true, weights=sample_weight, axis=0)
|
|
denominator = np.average((y_true - y_true_avg) ** 2, weights=sample_weight, axis=0)
|
|
|
|
return _assemble_r2_explained_variance(
|
|
numerator=numerator,
|
|
denominator=denominator,
|
|
n_outputs=y_true.shape[1],
|
|
multioutput=multioutput,
|
|
force_finite=force_finite,
|
|
)
|
|
|
|
|
|
def r2_score(
|
|
y_true,
|
|
y_pred,
|
|
*,
|
|
sample_weight=None,
|
|
multioutput="uniform_average",
|
|
force_finite=True,
|
|
):
|
|
""":math:`R^2` (coefficient of determination) regression score function.
|
|
|
|
Best possible score is 1.0 and it can be negative (because the
|
|
model can be arbitrarily worse). In the general case when the true y is
|
|
non-constant, a constant model that always predicts the average y
|
|
disregarding the input features would get a :math:`R^2` score of 0.0.
|
|
|
|
In the particular case when ``y_true`` is constant, the :math:`R^2` score
|
|
is not finite: it is either ``NaN`` (perfect predictions) or ``-Inf``
|
|
(imperfect predictions). To prevent such non-finite numbers to pollute
|
|
higher-level experiments such as a grid search cross-validation, by default
|
|
these cases are replaced with 1.0 (perfect predictions) or 0.0 (imperfect
|
|
predictions) respectively. You can set ``force_finite`` to ``False`` to
|
|
prevent this fix from happening.
|
|
|
|
Note: when the prediction residuals have zero mean, the :math:`R^2` score
|
|
is identical to the
|
|
:func:`Explained Variance score <explained_variance_score>`.
|
|
|
|
Read more in the :ref:`User Guide <r2_score>`.
|
|
|
|
Parameters
|
|
----------
|
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
Ground truth (correct) target values.
|
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
Estimated target values.
|
|
|
|
sample_weight : array-like of shape (n_samples,), default=None
|
|
Sample weights.
|
|
|
|
multioutput : {'raw_values', 'uniform_average', 'variance_weighted'}, \
|
|
array-like of shape (n_outputs,) or None, default='uniform_average'
|
|
|
|
Defines aggregating of multiple output scores.
|
|
Array-like value defines weights used to average scores.
|
|
Default is "uniform_average".
|
|
|
|
'raw_values' :
|
|
Returns a full set of scores in case of multioutput input.
|
|
|
|
'uniform_average' :
|
|
Scores of all outputs are averaged with uniform weight.
|
|
|
|
'variance_weighted' :
|
|
Scores of all outputs are averaged, weighted by the variances
|
|
of each individual output.
|
|
|
|
.. versionchanged:: 0.19
|
|
Default value of multioutput is 'uniform_average'.
|
|
|
|
force_finite : bool, default=True
|
|
Flag indicating if ``NaN`` and ``-Inf`` scores resulting from constant
|
|
data should be replaced with real numbers (``1.0`` if prediction is
|
|
perfect, ``0.0`` otherwise). Default is ``True``, a convenient setting
|
|
for hyperparameters' search procedures (e.g. grid search
|
|
cross-validation).
|
|
|
|
.. versionadded:: 1.1
|
|
|
|
Returns
|
|
-------
|
|
z : float or ndarray of floats
|
|
The :math:`R^2` score or ndarray of scores if 'multioutput' is
|
|
'raw_values'.
|
|
|
|
Notes
|
|
-----
|
|
This is not a symmetric function.
|
|
|
|
Unlike most other scores, :math:`R^2` score may be negative (it need not
|
|
actually be the square of a quantity R).
|
|
|
|
This metric is not well-defined for single samples and will return a NaN
|
|
value if n_samples is less than two.
|
|
|
|
References
|
|
----------
|
|
.. [1] `Wikipedia entry on the Coefficient of determination
|
|
<https://en.wikipedia.org/wiki/Coefficient_of_determination>`_
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.metrics import r2_score
|
|
>>> y_true = [3, -0.5, 2, 7]
|
|
>>> y_pred = [2.5, 0.0, 2, 8]
|
|
>>> r2_score(y_true, y_pred)
|
|
0.948...
|
|
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
|
|
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
|
|
>>> r2_score(y_true, y_pred,
|
|
... multioutput='variance_weighted')
|
|
0.938...
|
|
>>> y_true = [1, 2, 3]
|
|
>>> y_pred = [1, 2, 3]
|
|
>>> r2_score(y_true, y_pred)
|
|
1.0
|
|
>>> y_true = [1, 2, 3]
|
|
>>> y_pred = [2, 2, 2]
|
|
>>> r2_score(y_true, y_pred)
|
|
0.0
|
|
>>> y_true = [1, 2, 3]
|
|
>>> y_pred = [3, 2, 1]
|
|
>>> r2_score(y_true, y_pred)
|
|
-3.0
|
|
>>> y_true = [-2, -2, -2]
|
|
>>> y_pred = [-2, -2, -2]
|
|
>>> r2_score(y_true, y_pred)
|
|
1.0
|
|
>>> r2_score(y_true, y_pred, force_finite=False)
|
|
nan
|
|
>>> y_true = [-2, -2, -2]
|
|
>>> y_pred = [-2, -2, -2 + 1e-8]
|
|
>>> r2_score(y_true, y_pred)
|
|
0.0
|
|
>>> r2_score(y_true, y_pred, force_finite=False)
|
|
-inf
|
|
"""
|
|
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
|
y_true, y_pred, multioutput
|
|
)
|
|
check_consistent_length(y_true, y_pred, sample_weight)
|
|
|
|
if _num_samples(y_pred) < 2:
|
|
msg = "R^2 score is not well-defined with less than two samples."
|
|
warnings.warn(msg, UndefinedMetricWarning)
|
|
return float("nan")
|
|
|
|
if sample_weight is not None:
|
|
sample_weight = column_or_1d(sample_weight)
|
|
weight = sample_weight[:, np.newaxis]
|
|
else:
|
|
weight = 1.0
|
|
|
|
numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
|
|
denominator = (
|
|
weight * (y_true - np.average(y_true, axis=0, weights=sample_weight)) ** 2
|
|
).sum(axis=0, dtype=np.float64)
|
|
|
|
return _assemble_r2_explained_variance(
|
|
numerator=numerator,
|
|
denominator=denominator,
|
|
n_outputs=y_true.shape[1],
|
|
multioutput=multioutput,
|
|
force_finite=force_finite,
|
|
)
|
|
|
|
|
|
def max_error(y_true, y_pred):
|
|
"""
|
|
The max_error metric calculates the maximum residual error.
|
|
|
|
Read more in the :ref:`User Guide <max_error>`.
|
|
|
|
Parameters
|
|
----------
|
|
y_true : array-like of shape (n_samples,)
|
|
Ground truth (correct) target values.
|
|
|
|
y_pred : array-like of shape (n_samples,)
|
|
Estimated target values.
|
|
|
|
Returns
|
|
-------
|
|
max_error : float
|
|
A positive floating point value (the best value is 0.0).
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.metrics import max_error
|
|
>>> y_true = [3, 2, 7, 1]
|
|
>>> y_pred = [4, 2, 7, 1]
|
|
>>> max_error(y_true, y_pred)
|
|
1
|
|
"""
|
|
y_type, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred, None)
|
|
if y_type == "continuous-multioutput":
|
|
raise ValueError("Multioutput not supported in max_error")
|
|
return np.max(np.abs(y_true - y_pred))
|
|
|
|
|
|
def _mean_tweedie_deviance(y_true, y_pred, sample_weight, power):
|
|
"""Mean Tweedie deviance regression loss."""
|
|
p = power
|
|
if p < 0:
|
|
# 'Extreme stable', y any real number, y_pred > 0
|
|
dev = 2 * (
|
|
np.power(np.maximum(y_true, 0), 2 - p) / ((1 - p) * (2 - p))
|
|
- y_true * np.power(y_pred, 1 - p) / (1 - p)
|
|
+ np.power(y_pred, 2 - p) / (2 - p)
|
|
)
|
|
elif p == 0:
|
|
# Normal distribution, y and y_pred any real number
|
|
dev = (y_true - y_pred) ** 2
|
|
elif p == 1:
|
|
# Poisson distribution
|
|
dev = 2 * (xlogy(y_true, y_true / y_pred) - y_true + y_pred)
|
|
elif p == 2:
|
|
# Gamma distribution
|
|
dev = 2 * (np.log(y_pred / y_true) + y_true / y_pred - 1)
|
|
else:
|
|
dev = 2 * (
|
|
np.power(y_true, 2 - p) / ((1 - p) * (2 - p))
|
|
- y_true * np.power(y_pred, 1 - p) / (1 - p)
|
|
+ np.power(y_pred, 2 - p) / (2 - p)
|
|
)
|
|
|
|
return np.average(dev, weights=sample_weight)
|
|
|
|
|
|
def mean_tweedie_deviance(y_true, y_pred, *, sample_weight=None, power=0):
|
|
"""Mean Tweedie deviance regression loss.
|
|
|
|
Read more in the :ref:`User Guide <mean_tweedie_deviance>`.
|
|
|
|
Parameters
|
|
----------
|
|
y_true : array-like of shape (n_samples,)
|
|
Ground truth (correct) target values.
|
|
|
|
y_pred : array-like of shape (n_samples,)
|
|
Estimated target values.
|
|
|
|
sample_weight : array-like of shape (n_samples,), default=None
|
|
Sample weights.
|
|
|
|
power : float, default=0
|
|
Tweedie power parameter. Either power <= 0 or power >= 1.
|
|
|
|
The higher `p` the less weight is given to extreme
|
|
deviations between true and predicted targets.
|
|
|
|
- power < 0: Extreme stable distribution. Requires: y_pred > 0.
|
|
- power = 0 : Normal distribution, output corresponds to
|
|
mean_squared_error. y_true and y_pred can be any real numbers.
|
|
- power = 1 : Poisson distribution. Requires: y_true >= 0 and
|
|
y_pred > 0.
|
|
- 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0
|
|
and y_pred > 0.
|
|
- power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.
|
|
- power = 3 : Inverse Gaussian distribution. Requires: y_true > 0
|
|
and y_pred > 0.
|
|
- otherwise : Positive stable distribution. Requires: y_true > 0
|
|
and y_pred > 0.
|
|
|
|
Returns
|
|
-------
|
|
loss : float
|
|
A non-negative floating point value (the best value is 0.0).
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.metrics import mean_tweedie_deviance
|
|
>>> y_true = [2, 0, 1, 4]
|
|
>>> y_pred = [0.5, 0.5, 2., 2.]
|
|
>>> mean_tweedie_deviance(y_true, y_pred, power=1)
|
|
1.4260...
|
|
"""
|
|
y_type, y_true, y_pred, _ = _check_reg_targets(
|
|
y_true, y_pred, None, dtype=[np.float64, np.float32]
|
|
)
|
|
if y_type == "continuous-multioutput":
|
|
raise ValueError("Multioutput not supported in mean_tweedie_deviance")
|
|
check_consistent_length(y_true, y_pred, sample_weight)
|
|
|
|
if sample_weight is not None:
|
|
sample_weight = column_or_1d(sample_weight)
|
|
sample_weight = sample_weight[:, np.newaxis]
|
|
|
|
p = check_scalar(
|
|
power,
|
|
name="power",
|
|
target_type=numbers.Real,
|
|
)
|
|
|
|
message = f"Mean Tweedie deviance error with power={p} can only be used on "
|
|
if p < 0:
|
|
# 'Extreme stable', y any real number, y_pred > 0
|
|
if (y_pred <= 0).any():
|
|
raise ValueError(message + "strictly positive y_pred.")
|
|
elif p == 0:
|
|
# Normal, y and y_pred can be any real number
|
|
pass
|
|
elif 0 < p < 1:
|
|
raise ValueError("Tweedie deviance is only defined for power<=0 and power>=1.")
|
|
elif 1 <= p < 2:
|
|
# Poisson and compound Poisson distribution, y >= 0, y_pred > 0
|
|
if (y_true < 0).any() or (y_pred <= 0).any():
|
|
raise ValueError(message + "non-negative y and strictly positive y_pred.")
|
|
elif p >= 2:
|
|
# Gamma and Extreme stable distribution, y and y_pred > 0
|
|
if (y_true <= 0).any() or (y_pred <= 0).any():
|
|
raise ValueError(message + "strictly positive y and y_pred.")
|
|
else: # pragma: nocover
|
|
# Unreachable statement
|
|
raise ValueError
|
|
|
|
return _mean_tweedie_deviance(
|
|
y_true, y_pred, sample_weight=sample_weight, power=power
|
|
)
|
|
|
|
|
|
def mean_poisson_deviance(y_true, y_pred, *, sample_weight=None):
|
|
"""Mean Poisson deviance regression loss.
|
|
|
|
Poisson deviance is equivalent to the Tweedie deviance with
|
|
the power parameter `power=1`.
|
|
|
|
Read more in the :ref:`User Guide <mean_tweedie_deviance>`.
|
|
|
|
Parameters
|
|
----------
|
|
y_true : array-like of shape (n_samples,)
|
|
Ground truth (correct) target values. Requires y_true >= 0.
|
|
|
|
y_pred : array-like of shape (n_samples,)
|
|
Estimated target values. Requires y_pred > 0.
|
|
|
|
sample_weight : array-like of shape (n_samples,), default=None
|
|
Sample weights.
|
|
|
|
Returns
|
|
-------
|
|
loss : float
|
|
A non-negative floating point value (the best value is 0.0).
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.metrics import mean_poisson_deviance
|
|
>>> y_true = [2, 0, 1, 4]
|
|
>>> y_pred = [0.5, 0.5, 2., 2.]
|
|
>>> mean_poisson_deviance(y_true, y_pred)
|
|
1.4260...
|
|
"""
|
|
return mean_tweedie_deviance(y_true, y_pred, sample_weight=sample_weight, power=1)
|
|
|
|
|
|
def mean_gamma_deviance(y_true, y_pred, *, sample_weight=None):
|
|
"""Mean Gamma deviance regression loss.
|
|
|
|
Gamma deviance is equivalent to the Tweedie deviance with
|
|
the power parameter `power=2`. It is invariant to scaling of
|
|
the target variable, and measures relative errors.
|
|
|
|
Read more in the :ref:`User Guide <mean_tweedie_deviance>`.
|
|
|
|
Parameters
|
|
----------
|
|
y_true : array-like of shape (n_samples,)
|
|
Ground truth (correct) target values. Requires y_true > 0.
|
|
|
|
y_pred : array-like of shape (n_samples,)
|
|
Estimated target values. Requires y_pred > 0.
|
|
|
|
sample_weight : array-like of shape (n_samples,), default=None
|
|
Sample weights.
|
|
|
|
Returns
|
|
-------
|
|
loss : float
|
|
A non-negative floating point value (the best value is 0.0).
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.metrics import mean_gamma_deviance
|
|
>>> y_true = [2, 0.5, 1, 4]
|
|
>>> y_pred = [0.5, 0.5, 2., 2.]
|
|
>>> mean_gamma_deviance(y_true, y_pred)
|
|
1.0568...
|
|
"""
|
|
return mean_tweedie_deviance(y_true, y_pred, sample_weight=sample_weight, power=2)
|
|
|
|
|
|
def d2_tweedie_score(y_true, y_pred, *, sample_weight=None, power=0):
|
|
"""D^2 regression score function, fraction of Tweedie deviance explained.
|
|
|
|
Best possible score is 1.0 and it can be negative (because the model can be
|
|
arbitrarily worse). A model that always uses the empirical mean of `y_true` as
|
|
constant prediction, disregarding the input features, gets a D^2 score of 0.0.
|
|
|
|
Read more in the :ref:`User Guide <d2_score>`.
|
|
|
|
.. versionadded:: 1.0
|
|
|
|
Parameters
|
|
----------
|
|
y_true : array-like of shape (n_samples,)
|
|
Ground truth (correct) target values.
|
|
|
|
y_pred : array-like of shape (n_samples,)
|
|
Estimated target values.
|
|
|
|
sample_weight : array-like of shape (n_samples,), optional
|
|
Sample weights.
|
|
|
|
power : float, default=0
|
|
Tweedie power parameter. Either power <= 0 or power >= 1.
|
|
|
|
The higher `p` the less weight is given to extreme
|
|
deviations between true and predicted targets.
|
|
|
|
- power < 0: Extreme stable distribution. Requires: y_pred > 0.
|
|
- power = 0 : Normal distribution, output corresponds to r2_score.
|
|
y_true and y_pred can be any real numbers.
|
|
- power = 1 : Poisson distribution. Requires: y_true >= 0 and
|
|
y_pred > 0.
|
|
- 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0
|
|
and y_pred > 0.
|
|
- power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.
|
|
- power = 3 : Inverse Gaussian distribution. Requires: y_true > 0
|
|
and y_pred > 0.
|
|
- otherwise : Positive stable distribution. Requires: y_true > 0
|
|
and y_pred > 0.
|
|
|
|
Returns
|
|
-------
|
|
z : float or ndarray of floats
|
|
The D^2 score.
|
|
|
|
Notes
|
|
-----
|
|
This is not a symmetric function.
|
|
|
|
Like R^2, D^2 score may be negative (it need not actually be the square of
|
|
a quantity D).
|
|
|
|
This metric is not well-defined for single samples and will return a NaN
|
|
value if n_samples is less than two.
|
|
|
|
References
|
|
----------
|
|
.. [1] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J.
|
|
Wainwright. "Statistical Learning with Sparsity: The Lasso and
|
|
Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.metrics import d2_tweedie_score
|
|
>>> y_true = [0.5, 1, 2.5, 7]
|
|
>>> y_pred = [1, 1, 5, 3.5]
|
|
>>> d2_tweedie_score(y_true, y_pred)
|
|
0.285...
|
|
>>> d2_tweedie_score(y_true, y_pred, power=1)
|
|
0.487...
|
|
>>> d2_tweedie_score(y_true, y_pred, power=2)
|
|
0.630...
|
|
>>> d2_tweedie_score(y_true, y_true, power=2)
|
|
1.0
|
|
"""
|
|
y_type, y_true, y_pred, _ = _check_reg_targets(
|
|
y_true, y_pred, None, dtype=[np.float64, np.float32]
|
|
)
|
|
if y_type == "continuous-multioutput":
|
|
raise ValueError("Multioutput not supported in d2_tweedie_score")
|
|
|
|
if _num_samples(y_pred) < 2:
|
|
msg = "D^2 score is not well-defined with less than two samples."
|
|
warnings.warn(msg, UndefinedMetricWarning)
|
|
return float("nan")
|
|
|
|
y_true, y_pred = np.squeeze(y_true), np.squeeze(y_pred)
|
|
numerator = mean_tweedie_deviance(
|
|
y_true, y_pred, sample_weight=sample_weight, power=power
|
|
)
|
|
|
|
y_avg = np.average(y_true, weights=sample_weight)
|
|
denominator = _mean_tweedie_deviance(
|
|
y_true, y_avg, sample_weight=sample_weight, power=power
|
|
)
|
|
|
|
return 1 - numerator / denominator
|
|
|
|
|
|
def d2_pinball_score(
|
|
y_true, y_pred, *, sample_weight=None, alpha=0.5, multioutput="uniform_average"
|
|
):
|
|
"""
|
|
:math:`D^2` regression score function, fraction of pinball loss explained.
|
|
|
|
Best possible score is 1.0 and it can be negative (because the model can be
|
|
arbitrarily worse). A model that always uses the empirical alpha-quantile of
|
|
`y_true` as constant prediction, disregarding the input features,
|
|
gets a :math:`D^2` score of 0.0.
|
|
|
|
Read more in the :ref:`User Guide <d2_score>`.
|
|
|
|
.. versionadded:: 1.1
|
|
|
|
Parameters
|
|
----------
|
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
Ground truth (correct) target values.
|
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
Estimated target values.
|
|
|
|
sample_weight : array-like of shape (n_samples,), optional
|
|
Sample weights.
|
|
|
|
alpha : float, default=0.5
|
|
Slope of the pinball deviance. It determines the quantile level alpha
|
|
for which the pinball deviance and also D2 are optimal.
|
|
The default `alpha=0.5` is equivalent to `d2_absolute_error_score`.
|
|
|
|
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
|
|
(n_outputs,), default='uniform_average'
|
|
Defines aggregating of multiple output values.
|
|
Array-like value defines weights used to average scores.
|
|
|
|
'raw_values' :
|
|
Returns a full set of errors in case of multioutput input.
|
|
|
|
'uniform_average' :
|
|
Scores of all outputs are averaged with uniform weight.
|
|
|
|
Returns
|
|
-------
|
|
score : float or ndarray of floats
|
|
The :math:`D^2` score with a pinball deviance
|
|
or ndarray of scores if `multioutput='raw_values'`.
|
|
|
|
Notes
|
|
-----
|
|
Like :math:`R^2`, :math:`D^2` score may be negative
|
|
(it need not actually be the square of a quantity D).
|
|
|
|
This metric is not well-defined for a single point and will return a NaN
|
|
value if n_samples is less than two.
|
|
|
|
References
|
|
----------
|
|
.. [1] Eq. (7) of `Koenker, Roger; Machado, José A. F. (1999).
|
|
"Goodness of Fit and Related Inference Processes for Quantile Regression"
|
|
<http://dx.doi.org/10.1080/01621459.1999.10473882>`_
|
|
.. [2] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J.
|
|
Wainwright. "Statistical Learning with Sparsity: The Lasso and
|
|
Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.metrics import d2_pinball_score
|
|
>>> y_true = [1, 2, 3]
|
|
>>> y_pred = [1, 3, 3]
|
|
>>> d2_pinball_score(y_true, y_pred)
|
|
0.5
|
|
>>> d2_pinball_score(y_true, y_pred, alpha=0.9)
|
|
0.772...
|
|
>>> d2_pinball_score(y_true, y_pred, alpha=0.1)
|
|
-1.045...
|
|
>>> d2_pinball_score(y_true, y_true, alpha=0.1)
|
|
1.0
|
|
"""
|
|
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
|
y_true, y_pred, multioutput
|
|
)
|
|
check_consistent_length(y_true, y_pred, sample_weight)
|
|
|
|
if _num_samples(y_pred) < 2:
|
|
msg = "D^2 score is not well-defined with less than two samples."
|
|
warnings.warn(msg, UndefinedMetricWarning)
|
|
return float("nan")
|
|
|
|
numerator = mean_pinball_loss(
|
|
y_true,
|
|
y_pred,
|
|
sample_weight=sample_weight,
|
|
alpha=alpha,
|
|
multioutput="raw_values",
|
|
)
|
|
|
|
if sample_weight is None:
|
|
y_quantile = np.tile(
|
|
np.percentile(y_true, q=alpha * 100, axis=0), (len(y_true), 1)
|
|
)
|
|
else:
|
|
sample_weight = _check_sample_weight(sample_weight, y_true)
|
|
y_quantile = np.tile(
|
|
_weighted_percentile(
|
|
y_true, sample_weight=sample_weight, percentile=alpha * 100
|
|
),
|
|
(len(y_true), 1),
|
|
)
|
|
|
|
denominator = mean_pinball_loss(
|
|
y_true,
|
|
y_quantile,
|
|
sample_weight=sample_weight,
|
|
alpha=alpha,
|
|
multioutput="raw_values",
|
|
)
|
|
|
|
nonzero_numerator = numerator != 0
|
|
nonzero_denominator = denominator != 0
|
|
valid_score = nonzero_numerator & nonzero_denominator
|
|
output_scores = np.ones(y_true.shape[1])
|
|
|
|
output_scores[valid_score] = 1 - (numerator[valid_score] / denominator[valid_score])
|
|
output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0
|
|
|
|
if isinstance(multioutput, str):
|
|
if multioutput == "raw_values":
|
|
# return scores individually
|
|
return output_scores
|
|
elif multioutput == "uniform_average":
|
|
# passing None as weights to np.average results in uniform mean
|
|
avg_weights = None
|
|
else:
|
|
raise ValueError(
|
|
"multioutput is expected to be 'raw_values' "
|
|
"or 'uniform_average' but we got %r"
|
|
" instead." % multioutput
|
|
)
|
|
else:
|
|
avg_weights = multioutput
|
|
|
|
return np.average(output_scores, weights=avg_weights)
|
|
|
|
|
|
def d2_absolute_error_score(
|
|
y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
|
|
):
|
|
"""
|
|
:math:`D^2` regression score function, \
|
|
fraction of absolute error explained.
|
|
|
|
Best possible score is 1.0 and it can be negative (because the model can be
|
|
arbitrarily worse). A model that always uses the empirical median of `y_true`
|
|
as constant prediction, disregarding the input features,
|
|
gets a :math:`D^2` score of 0.0.
|
|
|
|
Read more in the :ref:`User Guide <d2_score>`.
|
|
|
|
.. versionadded:: 1.1
|
|
|
|
Parameters
|
|
----------
|
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
Ground truth (correct) target values.
|
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
Estimated target values.
|
|
|
|
sample_weight : array-like of shape (n_samples,), optional
|
|
Sample weights.
|
|
|
|
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
|
|
(n_outputs,), default='uniform_average'
|
|
Defines aggregating of multiple output values.
|
|
Array-like value defines weights used to average scores.
|
|
|
|
'raw_values' :
|
|
Returns a full set of errors in case of multioutput input.
|
|
|
|
'uniform_average' :
|
|
Scores of all outputs are averaged with uniform weight.
|
|
|
|
Returns
|
|
-------
|
|
score : float or ndarray of floats
|
|
The :math:`D^2` score with an absolute error deviance
|
|
or ndarray of scores if 'multioutput' is 'raw_values'.
|
|
|
|
Notes
|
|
-----
|
|
Like :math:`R^2`, :math:`D^2` score may be negative
|
|
(it need not actually be the square of a quantity D).
|
|
|
|
This metric is not well-defined for single samples and will return a NaN
|
|
value if n_samples is less than two.
|
|
|
|
References
|
|
----------
|
|
.. [1] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J.
|
|
Wainwright. "Statistical Learning with Sparsity: The Lasso and
|
|
Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.metrics import d2_absolute_error_score
|
|
>>> y_true = [3, -0.5, 2, 7]
|
|
>>> y_pred = [2.5, 0.0, 2, 8]
|
|
>>> d2_absolute_error_score(y_true, y_pred)
|
|
0.764...
|
|
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
|
|
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
|
|
>>> d2_absolute_error_score(y_true, y_pred, multioutput='uniform_average')
|
|
0.691...
|
|
>>> d2_absolute_error_score(y_true, y_pred, multioutput='raw_values')
|
|
array([0.8125 , 0.57142857])
|
|
>>> y_true = [1, 2, 3]
|
|
>>> y_pred = [1, 2, 3]
|
|
>>> d2_absolute_error_score(y_true, y_pred)
|
|
1.0
|
|
>>> y_true = [1, 2, 3]
|
|
>>> y_pred = [2, 2, 2]
|
|
>>> d2_absolute_error_score(y_true, y_pred)
|
|
0.0
|
|
>>> y_true = [1, 2, 3]
|
|
>>> y_pred = [3, 2, 1]
|
|
>>> d2_absolute_error_score(y_true, y_pred)
|
|
-1.0
|
|
"""
|
|
return d2_pinball_score(
|
|
y_true, y_pred, sample_weight=sample_weight, alpha=0.5, multioutput=multioutput
|
|
)
|