"""Metrics to assess performance on regression task. Functions named as ``*_score`` return a scalar value to maximize: the higher the better. Function named as ``*_error`` or ``*_loss`` return a scalar value to minimize: the lower the better. """ # Authors: Alexandre Gramfort # Mathieu Blondel # Olivier Grisel # Arnaud Joly # Jochen Wersdorfer # Lars Buitinck # Joel Nothman # Karan Desai # Noel Dawe # Manoj Kumar # Michael Eickenberg # Konstantin Shmelkov # Christian Lorentzen # Ashutosh Hathidara # Uttam kumar # Sylvain Marie # Ohad Michel # License: BSD 3 clause import numbers import warnings import numpy as np from scipy.special import xlogy from ..exceptions import UndefinedMetricWarning from ..utils.validation import ( check_array, check_consistent_length, check_scalar, _num_samples, column_or_1d, _check_sample_weight, ) from ..utils.stats import _weighted_percentile __ALL__ = [ "max_error", "mean_absolute_error", "mean_squared_error", "mean_squared_log_error", "median_absolute_error", "mean_absolute_percentage_error", "mean_pinball_loss", "r2_score", "explained_variance_score", "mean_tweedie_deviance", "mean_poisson_deviance", "mean_gamma_deviance", "d2_tweedie_score", "d2_pinball_score", "d2_absolute_error_score", ] def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric"): """Check that y_true and y_pred belong to the same regression task. Parameters ---------- y_true : array-like y_pred : array-like multioutput : array-like or string in ['raw_values', uniform_average', 'variance_weighted'] or None None is accepted due to backward compatibility of r2_score(). dtype : str or list, default="numeric" the dtype argument passed to check_array. Returns ------- type_true : one of {'continuous', continuous-multioutput'} The type of the true target data, as output by 'utils.multiclass.type_of_target'. y_true : array-like of shape (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples, n_outputs) Estimated target values. multioutput : array-like of shape (n_outputs) or string in ['raw_values', uniform_average', 'variance_weighted'] or None Custom output weights if ``multioutput`` is array-like or just the corresponding argument if ``multioutput`` is a correct keyword. """ check_consistent_length(y_true, y_pred) y_true = check_array(y_true, ensure_2d=False, dtype=dtype) y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype) if y_true.ndim == 1: y_true = y_true.reshape((-1, 1)) if y_pred.ndim == 1: y_pred = y_pred.reshape((-1, 1)) if y_true.shape[1] != y_pred.shape[1]: raise ValueError( "y_true and y_pred have different number of output ({0}!={1})".format( y_true.shape[1], y_pred.shape[1] ) ) n_outputs = y_true.shape[1] allowed_multioutput_str = ("raw_values", "uniform_average", "variance_weighted") if isinstance(multioutput, str): if multioutput not in allowed_multioutput_str: raise ValueError( "Allowed 'multioutput' string values are {}. " "You provided multioutput={!r}".format( allowed_multioutput_str, multioutput ) ) elif multioutput is not None: multioutput = check_array(multioutput, ensure_2d=False) if n_outputs == 1: raise ValueError("Custom weights are useful only in multi-output cases.") elif n_outputs != len(multioutput): raise ValueError( "There must be equally many custom weights (%d) as outputs (%d)." % (len(multioutput), n_outputs) ) y_type = "continuous" if n_outputs == 1 else "continuous-multioutput" return y_type, y_true, y_pred, multioutput def mean_absolute_error( y_true, y_pred, *, sample_weight=None, multioutput="uniform_average" ): """Mean absolute error regression loss. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ (n_outputs,), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight. Returns ------- loss : float or ndarray of floats If multioutput is 'raw_values', then mean absolute error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the weighted average of all output errors is returned. MAE output is non-negative floating point. The best value is 0.0. Examples -------- >>> from sklearn.metrics import mean_absolute_error >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> mean_absolute_error(y_true, y_pred) 0.5 >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> mean_absolute_error(y_true, y_pred) 0.75 >>> mean_absolute_error(y_true, y_pred, multioutput='raw_values') array([0.5, 1. ]) >>> mean_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7]) 0.85... """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput ) check_consistent_length(y_true, y_pred, sample_weight) output_errors = np.average(np.abs(y_pred - y_true), weights=sample_weight, axis=0) if isinstance(multioutput, str): if multioutput == "raw_values": return output_errors elif multioutput == "uniform_average": # pass None as weights to np.average: uniform mean multioutput = None return np.average(output_errors, weights=multioutput) def mean_pinball_loss( y_true, y_pred, *, sample_weight=None, alpha=0.5, multioutput="uniform_average" ): """Pinball loss for quantile regression. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. alpha : float, slope of the pinball loss, default=0.5, This loss is equivalent to :ref:`mean_absolute_error` when `alpha=0.5`, `alpha=0.95` is minimized by estimators of the 95th percentile. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ (n_outputs,), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight. Returns ------- loss : float or ndarray of floats If multioutput is 'raw_values', then mean absolute error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the weighted average of all output errors is returned. The pinball loss output is a non-negative floating point. The best value is 0.0. Examples -------- >>> from sklearn.metrics import mean_pinball_loss >>> y_true = [1, 2, 3] >>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.1) 0.03... >>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.1) 0.3... >>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.9) 0.3... >>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.9) 0.03... >>> mean_pinball_loss(y_true, y_true, alpha=0.1) 0.0 >>> mean_pinball_loss(y_true, y_true, alpha=0.9) 0.0 """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput ) check_consistent_length(y_true, y_pred, sample_weight) diff = y_true - y_pred sign = (diff >= 0).astype(diff.dtype) loss = alpha * sign * diff - (1 - alpha) * (1 - sign) * diff output_errors = np.average(loss, weights=sample_weight, axis=0) if isinstance(multioutput, str): if multioutput == "raw_values": return output_errors elif multioutput == "uniform_average": # pass None as weights to np.average: uniform mean multioutput = None else: raise ValueError( "multioutput is expected to be 'raw_values' " "or 'uniform_average' but we got %r" " instead." % multioutput ) return np.average(output_errors, weights=multioutput) def mean_absolute_percentage_error( y_true, y_pred, *, sample_weight=None, multioutput="uniform_average" ): """Mean absolute percentage error (MAPE) regression loss. Note here that the output is not a percentage in the range [0, 100] and a value of 100 does not mean 100% but 1e2. Furthermore, the output can be arbitrarily high when `y_true` is small (which is specific to the metric) or when `abs(y_true - y_pred)` is large (which is common for most regression metrics). Read more in the :ref:`User Guide `. .. versionadded:: 0.24 Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : {'raw_values', 'uniform_average'} or array-like Defines aggregating of multiple output values. Array-like value defines weights used to average errors. If input is list then the shape must be (n_outputs,). 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight. Returns ------- loss : float or ndarray of floats If multioutput is 'raw_values', then mean absolute percentage error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the weighted average of all output errors is returned. MAPE output is non-negative floating point. The best value is 0.0. But note that bad predictions can lead to arbitrarily large MAPE values, especially if some `y_true` values are very close to zero. Note that we return a large value instead of `inf` when `y_true` is zero. Examples -------- >>> from sklearn.metrics import mean_absolute_percentage_error >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> mean_absolute_percentage_error(y_true, y_pred) 0.3273... >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> mean_absolute_percentage_error(y_true, y_pred) 0.5515... >>> mean_absolute_percentage_error(y_true, y_pred, multioutput=[0.3, 0.7]) 0.6198... >>> # the value when some element of the y_true is zero is arbitrarily high because >>> # of the division by epsilon >>> y_true = [1., 0., 2.4, 7.] >>> y_pred = [1.2, 0.1, 2.4, 8.] >>> mean_absolute_percentage_error(y_true, y_pred) 112589990684262.48 """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput ) check_consistent_length(y_true, y_pred, sample_weight) epsilon = np.finfo(np.float64).eps mape = np.abs(y_pred - y_true) / np.maximum(np.abs(y_true), epsilon) output_errors = np.average(mape, weights=sample_weight, axis=0) if isinstance(multioutput, str): if multioutput == "raw_values": return output_errors elif multioutput == "uniform_average": # pass None as weights to np.average: uniform mean multioutput = None return np.average(output_errors, weights=multioutput) def mean_squared_error( y_true, y_pred, *, sample_weight=None, multioutput="uniform_average", squared=True ): """Mean squared error regression loss. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ (n_outputs,), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight. squared : bool, default=True If True returns MSE value, if False returns RMSE value. Returns ------- loss : float or ndarray of floats A non-negative floating point value (the best value is 0.0), or an array of floating point values, one for each individual target. Examples -------- >>> from sklearn.metrics import mean_squared_error >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> mean_squared_error(y_true, y_pred) 0.375 >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> mean_squared_error(y_true, y_pred, squared=False) 0.612... >>> y_true = [[0.5, 1],[-1, 1],[7, -6]] >>> y_pred = [[0, 2],[-1, 2],[8, -5]] >>> mean_squared_error(y_true, y_pred) 0.708... >>> mean_squared_error(y_true, y_pred, squared=False) 0.822... >>> mean_squared_error(y_true, y_pred, multioutput='raw_values') array([0.41666667, 1. ]) >>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7]) 0.825... """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput ) check_consistent_length(y_true, y_pred, sample_weight) output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight) if not squared: output_errors = np.sqrt(output_errors) if isinstance(multioutput, str): if multioutput == "raw_values": return output_errors elif multioutput == "uniform_average": # pass None as weights to np.average: uniform mean multioutput = None return np.average(output_errors, weights=multioutput) def mean_squared_log_error( y_true, y_pred, *, sample_weight=None, multioutput="uniform_average", squared=True ): """Mean squared logarithmic error regression loss. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ (n_outputs,), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors when the input is of multioutput format. 'uniform_average' : Errors of all outputs are averaged with uniform weight. squared : bool, default=True If True returns MSLE (mean squared log error) value. If False returns RMSLE (root mean squared log error) value. Returns ------- loss : float or ndarray of floats A non-negative floating point value (the best value is 0.0), or an array of floating point values, one for each individual target. Examples -------- >>> from sklearn.metrics import mean_squared_log_error >>> y_true = [3, 5, 2.5, 7] >>> y_pred = [2.5, 5, 4, 8] >>> mean_squared_log_error(y_true, y_pred) 0.039... >>> mean_squared_log_error(y_true, y_pred, squared=False) 0.199... >>> y_true = [[0.5, 1], [1, 2], [7, 6]] >>> y_pred = [[0.5, 2], [1, 2.5], [8, 8]] >>> mean_squared_log_error(y_true, y_pred) 0.044... >>> mean_squared_log_error(y_true, y_pred, multioutput='raw_values') array([0.00462428, 0.08377444]) >>> mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7]) 0.060... """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput ) check_consistent_length(y_true, y_pred, sample_weight) if (y_true < 0).any() or (y_pred < 0).any(): raise ValueError( "Mean Squared Logarithmic Error cannot be used when " "targets contain negative values." ) return mean_squared_error( np.log1p(y_true), np.log1p(y_pred), sample_weight=sample_weight, multioutput=multioutput, squared=squared, ) def median_absolute_error( y_true, y_pred, *, multioutput="uniform_average", sample_weight=None ): """Median absolute error regression loss. Median absolute error output is non-negative floating point. The best value is 0.0. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape = (n_samples) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs) Estimated target values. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ (n_outputs,), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight. sample_weight : array-like of shape (n_samples,), default=None Sample weights. .. versionadded:: 0.24 Returns ------- loss : float or ndarray of floats If multioutput is 'raw_values', then mean absolute error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the weighted average of all output errors is returned. Examples -------- >>> from sklearn.metrics import median_absolute_error >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> median_absolute_error(y_true, y_pred) 0.5 >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> median_absolute_error(y_true, y_pred) 0.75 >>> median_absolute_error(y_true, y_pred, multioutput='raw_values') array([0.5, 1. ]) >>> median_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7]) 0.85 """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput ) if sample_weight is None: output_errors = np.median(np.abs(y_pred - y_true), axis=0) else: sample_weight = _check_sample_weight(sample_weight, y_pred) output_errors = _weighted_percentile( np.abs(y_pred - y_true), sample_weight=sample_weight ) if isinstance(multioutput, str): if multioutput == "raw_values": return output_errors elif multioutput == "uniform_average": # pass None as weights to np.average: uniform mean multioutput = None return np.average(output_errors, weights=multioutput) def _assemble_r2_explained_variance( numerator, denominator, n_outputs, multioutput, force_finite ): """Common part used by explained variance score and :math:`R^2` score.""" nonzero_denominator = denominator != 0 if not force_finite: # Standard formula, that may lead to NaN or -Inf output_scores = 1 - (numerator / denominator) else: nonzero_numerator = numerator != 0 # Default = Zero Numerator = perfect predictions. Set to 1.0 # (note: even if denominator is zero, thus avoiding NaN scores) output_scores = np.ones([n_outputs]) # Non-zero Numerator and Non-zero Denominator: use the formula valid_score = nonzero_denominator & nonzero_numerator output_scores[valid_score] = 1 - ( numerator[valid_score] / denominator[valid_score] ) # Non-zero Numerator and Zero Denominator: # arbitrary set to 0.0 to avoid -inf scores output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0 if isinstance(multioutput, str): if multioutput == "raw_values": # return scores individually return output_scores elif multioutput == "uniform_average": # Passing None as weights to np.average results is uniform mean avg_weights = None elif multioutput == "variance_weighted": avg_weights = denominator if not np.any(nonzero_denominator): # All weights are zero, np.average would raise a ZeroDiv error. # This only happens when all y are constant (or 1-element long) # Since weights are all equal, fall back to uniform weights. avg_weights = None else: avg_weights = multioutput return np.average(output_scores, weights=avg_weights) def explained_variance_score( y_true, y_pred, *, sample_weight=None, multioutput="uniform_average", force_finite=True, ): """Explained variance regression score function. Best possible score is 1.0, lower values are worse. In the particular case when ``y_true`` is constant, the explained variance score is not finite: it is either ``NaN`` (perfect predictions) or ``-Inf`` (imperfect predictions). To prevent such non-finite numbers to pollute higher-level experiments such as a grid search cross-validation, by default these cases are replaced with 1.0 (perfect predictions) or 0.0 (imperfect predictions) respectively. If ``force_finite`` is set to ``False``, this score falls back on the original :math:`R^2` definition. .. note:: The Explained Variance score is similar to the :func:`R^2 score `, with the notable difference that it does not account for systematic offsets in the prediction. Most often the :func:`R^2 score ` should be preferred. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : {'raw_values', 'uniform_average', 'variance_weighted'} or \ array-like of shape (n_outputs,), default='uniform_average' Defines aggregating of multiple output scores. Array-like value defines weights used to average scores. 'raw_values' : Returns a full set of scores in case of multioutput input. 'uniform_average' : Scores of all outputs are averaged with uniform weight. 'variance_weighted' : Scores of all outputs are averaged, weighted by the variances of each individual output. force_finite : bool, default=True Flag indicating if ``NaN`` and ``-Inf`` scores resulting from constant data should be replaced with real numbers (``1.0`` if prediction is perfect, ``0.0`` otherwise). Default is ``True``, a convenient setting for hyperparameters' search procedures (e.g. grid search cross-validation). .. versionadded:: 1.1 Returns ------- score : float or ndarray of floats The explained variance or ndarray if 'multioutput' is 'raw_values'. See Also -------- r2_score : Similar metric, but accounting for systematic offsets in prediction. Notes ----- This is not a symmetric function. Examples -------- >>> from sklearn.metrics import explained_variance_score >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> explained_variance_score(y_true, y_pred) 0.957... >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> explained_variance_score(y_true, y_pred, multioutput='uniform_average') 0.983... >>> y_true = [-2, -2, -2] >>> y_pred = [-2, -2, -2] >>> explained_variance_score(y_true, y_pred) 1.0 >>> explained_variance_score(y_true, y_pred, force_finite=False) nan >>> y_true = [-2, -2, -2] >>> y_pred = [-2, -2, -2 + 1e-8] >>> explained_variance_score(y_true, y_pred) 0.0 >>> explained_variance_score(y_true, y_pred, force_finite=False) -inf """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput ) check_consistent_length(y_true, y_pred, sample_weight) y_diff_avg = np.average(y_true - y_pred, weights=sample_weight, axis=0) numerator = np.average( (y_true - y_pred - y_diff_avg) ** 2, weights=sample_weight, axis=0 ) y_true_avg = np.average(y_true, weights=sample_weight, axis=0) denominator = np.average((y_true - y_true_avg) ** 2, weights=sample_weight, axis=0) return _assemble_r2_explained_variance( numerator=numerator, denominator=denominator, n_outputs=y_true.shape[1], multioutput=multioutput, force_finite=force_finite, ) def r2_score( y_true, y_pred, *, sample_weight=None, multioutput="uniform_average", force_finite=True, ): """:math:`R^2` (coefficient of determination) regression score function. Best possible score is 1.0 and it can be negative (because the model can be arbitrarily worse). In the general case when the true y is non-constant, a constant model that always predicts the average y disregarding the input features would get a :math:`R^2` score of 0.0. In the particular case when ``y_true`` is constant, the :math:`R^2` score is not finite: it is either ``NaN`` (perfect predictions) or ``-Inf`` (imperfect predictions). To prevent such non-finite numbers to pollute higher-level experiments such as a grid search cross-validation, by default these cases are replaced with 1.0 (perfect predictions) or 0.0 (imperfect predictions) respectively. You can set ``force_finite`` to ``False`` to prevent this fix from happening. Note: when the prediction residuals have zero mean, the :math:`R^2` score is identical to the :func:`Explained Variance score `. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : {'raw_values', 'uniform_average', 'variance_weighted'}, \ array-like of shape (n_outputs,) or None, default='uniform_average' Defines aggregating of multiple output scores. Array-like value defines weights used to average scores. Default is "uniform_average". 'raw_values' : Returns a full set of scores in case of multioutput input. 'uniform_average' : Scores of all outputs are averaged with uniform weight. 'variance_weighted' : Scores of all outputs are averaged, weighted by the variances of each individual output. .. versionchanged:: 0.19 Default value of multioutput is 'uniform_average'. force_finite : bool, default=True Flag indicating if ``NaN`` and ``-Inf`` scores resulting from constant data should be replaced with real numbers (``1.0`` if prediction is perfect, ``0.0`` otherwise). Default is ``True``, a convenient setting for hyperparameters' search procedures (e.g. grid search cross-validation). .. versionadded:: 1.1 Returns ------- z : float or ndarray of floats The :math:`R^2` score or ndarray of scores if 'multioutput' is 'raw_values'. Notes ----- This is not a symmetric function. Unlike most other scores, :math:`R^2` score may be negative (it need not actually be the square of a quantity R). This metric is not well-defined for single samples and will return a NaN value if n_samples is less than two. References ---------- .. [1] `Wikipedia entry on the Coefficient of determination `_ Examples -------- >>> from sklearn.metrics import r2_score >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> r2_score(y_true, y_pred) 0.948... >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> r2_score(y_true, y_pred, ... multioutput='variance_weighted') 0.938... >>> y_true = [1, 2, 3] >>> y_pred = [1, 2, 3] >>> r2_score(y_true, y_pred) 1.0 >>> y_true = [1, 2, 3] >>> y_pred = [2, 2, 2] >>> r2_score(y_true, y_pred) 0.0 >>> y_true = [1, 2, 3] >>> y_pred = [3, 2, 1] >>> r2_score(y_true, y_pred) -3.0 >>> y_true = [-2, -2, -2] >>> y_pred = [-2, -2, -2] >>> r2_score(y_true, y_pred) 1.0 >>> r2_score(y_true, y_pred, force_finite=False) nan >>> y_true = [-2, -2, -2] >>> y_pred = [-2, -2, -2 + 1e-8] >>> r2_score(y_true, y_pred) 0.0 >>> r2_score(y_true, y_pred, force_finite=False) -inf """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput ) check_consistent_length(y_true, y_pred, sample_weight) if _num_samples(y_pred) < 2: msg = "R^2 score is not well-defined with less than two samples." warnings.warn(msg, UndefinedMetricWarning) return float("nan") if sample_weight is not None: sample_weight = column_or_1d(sample_weight) weight = sample_weight[:, np.newaxis] else: weight = 1.0 numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64) denominator = ( weight * (y_true - np.average(y_true, axis=0, weights=sample_weight)) ** 2 ).sum(axis=0, dtype=np.float64) return _assemble_r2_explained_variance( numerator=numerator, denominator=denominator, n_outputs=y_true.shape[1], multioutput=multioutput, force_finite=force_finite, ) def max_error(y_true, y_pred): """ The max_error metric calculates the maximum residual error. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) Estimated target values. Returns ------- max_error : float A positive floating point value (the best value is 0.0). Examples -------- >>> from sklearn.metrics import max_error >>> y_true = [3, 2, 7, 1] >>> y_pred = [4, 2, 7, 1] >>> max_error(y_true, y_pred) 1 """ y_type, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred, None) if y_type == "continuous-multioutput": raise ValueError("Multioutput not supported in max_error") return np.max(np.abs(y_true - y_pred)) def _mean_tweedie_deviance(y_true, y_pred, sample_weight, power): """Mean Tweedie deviance regression loss.""" p = power if p < 0: # 'Extreme stable', y any real number, y_pred > 0 dev = 2 * ( np.power(np.maximum(y_true, 0), 2 - p) / ((1 - p) * (2 - p)) - y_true * np.power(y_pred, 1 - p) / (1 - p) + np.power(y_pred, 2 - p) / (2 - p) ) elif p == 0: # Normal distribution, y and y_pred any real number dev = (y_true - y_pred) ** 2 elif p == 1: # Poisson distribution dev = 2 * (xlogy(y_true, y_true / y_pred) - y_true + y_pred) elif p == 2: # Gamma distribution dev = 2 * (np.log(y_pred / y_true) + y_true / y_pred - 1) else: dev = 2 * ( np.power(y_true, 2 - p) / ((1 - p) * (2 - p)) - y_true * np.power(y_pred, 1 - p) / (1 - p) + np.power(y_pred, 2 - p) / (2 - p) ) return np.average(dev, weights=sample_weight) def mean_tweedie_deviance(y_true, y_pred, *, sample_weight=None, power=0): """Mean Tweedie deviance regression loss. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. power : float, default=0 Tweedie power parameter. Either power <= 0 or power >= 1. The higher `p` the less weight is given to extreme deviations between true and predicted targets. - power < 0: Extreme stable distribution. Requires: y_pred > 0. - power = 0 : Normal distribution, output corresponds to mean_squared_error. y_true and y_pred can be any real numbers. - power = 1 : Poisson distribution. Requires: y_true >= 0 and y_pred > 0. - 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0 and y_pred > 0. - power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0. - power = 3 : Inverse Gaussian distribution. Requires: y_true > 0 and y_pred > 0. - otherwise : Positive stable distribution. Requires: y_true > 0 and y_pred > 0. Returns ------- loss : float A non-negative floating point value (the best value is 0.0). Examples -------- >>> from sklearn.metrics import mean_tweedie_deviance >>> y_true = [2, 0, 1, 4] >>> y_pred = [0.5, 0.5, 2., 2.] >>> mean_tweedie_deviance(y_true, y_pred, power=1) 1.4260... """ y_type, y_true, y_pred, _ = _check_reg_targets( y_true, y_pred, None, dtype=[np.float64, np.float32] ) if y_type == "continuous-multioutput": raise ValueError("Multioutput not supported in mean_tweedie_deviance") check_consistent_length(y_true, y_pred, sample_weight) if sample_weight is not None: sample_weight = column_or_1d(sample_weight) sample_weight = sample_weight[:, np.newaxis] p = check_scalar( power, name="power", target_type=numbers.Real, ) message = f"Mean Tweedie deviance error with power={p} can only be used on " if p < 0: # 'Extreme stable', y any real number, y_pred > 0 if (y_pred <= 0).any(): raise ValueError(message + "strictly positive y_pred.") elif p == 0: # Normal, y and y_pred can be any real number pass elif 0 < p < 1: raise ValueError("Tweedie deviance is only defined for power<=0 and power>=1.") elif 1 <= p < 2: # Poisson and compound Poisson distribution, y >= 0, y_pred > 0 if (y_true < 0).any() or (y_pred <= 0).any(): raise ValueError(message + "non-negative y and strictly positive y_pred.") elif p >= 2: # Gamma and Extreme stable distribution, y and y_pred > 0 if (y_true <= 0).any() or (y_pred <= 0).any(): raise ValueError(message + "strictly positive y and y_pred.") else: # pragma: nocover # Unreachable statement raise ValueError return _mean_tweedie_deviance( y_true, y_pred, sample_weight=sample_weight, power=power ) def mean_poisson_deviance(y_true, y_pred, *, sample_weight=None): """Mean Poisson deviance regression loss. Poisson deviance is equivalent to the Tweedie deviance with the power parameter `power=1`. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) Ground truth (correct) target values. Requires y_true >= 0. y_pred : array-like of shape (n_samples,) Estimated target values. Requires y_pred > 0. sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns ------- loss : float A non-negative floating point value (the best value is 0.0). Examples -------- >>> from sklearn.metrics import mean_poisson_deviance >>> y_true = [2, 0, 1, 4] >>> y_pred = [0.5, 0.5, 2., 2.] >>> mean_poisson_deviance(y_true, y_pred) 1.4260... """ return mean_tweedie_deviance(y_true, y_pred, sample_weight=sample_weight, power=1) def mean_gamma_deviance(y_true, y_pred, *, sample_weight=None): """Mean Gamma deviance regression loss. Gamma deviance is equivalent to the Tweedie deviance with the power parameter `power=2`. It is invariant to scaling of the target variable, and measures relative errors. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) Ground truth (correct) target values. Requires y_true > 0. y_pred : array-like of shape (n_samples,) Estimated target values. Requires y_pred > 0. sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns ------- loss : float A non-negative floating point value (the best value is 0.0). Examples -------- >>> from sklearn.metrics import mean_gamma_deviance >>> y_true = [2, 0.5, 1, 4] >>> y_pred = [0.5, 0.5, 2., 2.] >>> mean_gamma_deviance(y_true, y_pred) 1.0568... """ return mean_tweedie_deviance(y_true, y_pred, sample_weight=sample_weight, power=2) def d2_tweedie_score(y_true, y_pred, *, sample_weight=None, power=0): """D^2 regression score function, fraction of Tweedie deviance explained. Best possible score is 1.0 and it can be negative (because the model can be arbitrarily worse). A model that always uses the empirical mean of `y_true` as constant prediction, disregarding the input features, gets a D^2 score of 0.0. Read more in the :ref:`User Guide `. .. versionadded:: 1.0 Parameters ---------- y_true : array-like of shape (n_samples,) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) Estimated target values. sample_weight : array-like of shape (n_samples,), optional Sample weights. power : float, default=0 Tweedie power parameter. Either power <= 0 or power >= 1. The higher `p` the less weight is given to extreme deviations between true and predicted targets. - power < 0: Extreme stable distribution. Requires: y_pred > 0. - power = 0 : Normal distribution, output corresponds to r2_score. y_true and y_pred can be any real numbers. - power = 1 : Poisson distribution. Requires: y_true >= 0 and y_pred > 0. - 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0 and y_pred > 0. - power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0. - power = 3 : Inverse Gaussian distribution. Requires: y_true > 0 and y_pred > 0. - otherwise : Positive stable distribution. Requires: y_true > 0 and y_pred > 0. Returns ------- z : float or ndarray of floats The D^2 score. Notes ----- This is not a symmetric function. Like R^2, D^2 score may be negative (it need not actually be the square of a quantity D). This metric is not well-defined for single samples and will return a NaN value if n_samples is less than two. References ---------- .. [1] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J. Wainwright. "Statistical Learning with Sparsity: The Lasso and Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/ Examples -------- >>> from sklearn.metrics import d2_tweedie_score >>> y_true = [0.5, 1, 2.5, 7] >>> y_pred = [1, 1, 5, 3.5] >>> d2_tweedie_score(y_true, y_pred) 0.285... >>> d2_tweedie_score(y_true, y_pred, power=1) 0.487... >>> d2_tweedie_score(y_true, y_pred, power=2) 0.630... >>> d2_tweedie_score(y_true, y_true, power=2) 1.0 """ y_type, y_true, y_pred, _ = _check_reg_targets( y_true, y_pred, None, dtype=[np.float64, np.float32] ) if y_type == "continuous-multioutput": raise ValueError("Multioutput not supported in d2_tweedie_score") if _num_samples(y_pred) < 2: msg = "D^2 score is not well-defined with less than two samples." warnings.warn(msg, UndefinedMetricWarning) return float("nan") y_true, y_pred = np.squeeze(y_true), np.squeeze(y_pred) numerator = mean_tweedie_deviance( y_true, y_pred, sample_weight=sample_weight, power=power ) y_avg = np.average(y_true, weights=sample_weight) denominator = _mean_tweedie_deviance( y_true, y_avg, sample_weight=sample_weight, power=power ) return 1 - numerator / denominator def d2_pinball_score( y_true, y_pred, *, sample_weight=None, alpha=0.5, multioutput="uniform_average" ): """ :math:`D^2` regression score function, fraction of pinball loss explained. Best possible score is 1.0 and it can be negative (because the model can be arbitrarily worse). A model that always uses the empirical alpha-quantile of `y_true` as constant prediction, disregarding the input features, gets a :math:`D^2` score of 0.0. Read more in the :ref:`User Guide `. .. versionadded:: 1.1 Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), optional Sample weights. alpha : float, default=0.5 Slope of the pinball deviance. It determines the quantile level alpha for which the pinball deviance and also D2 are optimal. The default `alpha=0.5` is equivalent to `d2_absolute_error_score`. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ (n_outputs,), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average scores. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Scores of all outputs are averaged with uniform weight. Returns ------- score : float or ndarray of floats The :math:`D^2` score with a pinball deviance or ndarray of scores if `multioutput='raw_values'`. Notes ----- Like :math:`R^2`, :math:`D^2` score may be negative (it need not actually be the square of a quantity D). This metric is not well-defined for a single point and will return a NaN value if n_samples is less than two. References ---------- .. [1] Eq. (7) of `Koenker, Roger; Machado, José A. F. (1999). "Goodness of Fit and Related Inference Processes for Quantile Regression" `_ .. [2] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J. Wainwright. "Statistical Learning with Sparsity: The Lasso and Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/ Examples -------- >>> from sklearn.metrics import d2_pinball_score >>> y_true = [1, 2, 3] >>> y_pred = [1, 3, 3] >>> d2_pinball_score(y_true, y_pred) 0.5 >>> d2_pinball_score(y_true, y_pred, alpha=0.9) 0.772... >>> d2_pinball_score(y_true, y_pred, alpha=0.1) -1.045... >>> d2_pinball_score(y_true, y_true, alpha=0.1) 1.0 """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput ) check_consistent_length(y_true, y_pred, sample_weight) if _num_samples(y_pred) < 2: msg = "D^2 score is not well-defined with less than two samples." warnings.warn(msg, UndefinedMetricWarning) return float("nan") numerator = mean_pinball_loss( y_true, y_pred, sample_weight=sample_weight, alpha=alpha, multioutput="raw_values", ) if sample_weight is None: y_quantile = np.tile( np.percentile(y_true, q=alpha * 100, axis=0), (len(y_true), 1) ) else: sample_weight = _check_sample_weight(sample_weight, y_true) y_quantile = np.tile( _weighted_percentile( y_true, sample_weight=sample_weight, percentile=alpha * 100 ), (len(y_true), 1), ) denominator = mean_pinball_loss( y_true, y_quantile, sample_weight=sample_weight, alpha=alpha, multioutput="raw_values", ) nonzero_numerator = numerator != 0 nonzero_denominator = denominator != 0 valid_score = nonzero_numerator & nonzero_denominator output_scores = np.ones(y_true.shape[1]) output_scores[valid_score] = 1 - (numerator[valid_score] / denominator[valid_score]) output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0 if isinstance(multioutput, str): if multioutput == "raw_values": # return scores individually return output_scores elif multioutput == "uniform_average": # passing None as weights to np.average results in uniform mean avg_weights = None else: raise ValueError( "multioutput is expected to be 'raw_values' " "or 'uniform_average' but we got %r" " instead." % multioutput ) else: avg_weights = multioutput return np.average(output_scores, weights=avg_weights) def d2_absolute_error_score( y_true, y_pred, *, sample_weight=None, multioutput="uniform_average" ): """ :math:`D^2` regression score function, \ fraction of absolute error explained. Best possible score is 1.0 and it can be negative (because the model can be arbitrarily worse). A model that always uses the empirical median of `y_true` as constant prediction, disregarding the input features, gets a :math:`D^2` score of 0.0. Read more in the :ref:`User Guide `. .. versionadded:: 1.1 Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), optional Sample weights. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ (n_outputs,), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average scores. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Scores of all outputs are averaged with uniform weight. Returns ------- score : float or ndarray of floats The :math:`D^2` score with an absolute error deviance or ndarray of scores if 'multioutput' is 'raw_values'. Notes ----- Like :math:`R^2`, :math:`D^2` score may be negative (it need not actually be the square of a quantity D). This metric is not well-defined for single samples and will return a NaN value if n_samples is less than two. References ---------- .. [1] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J. Wainwright. "Statistical Learning with Sparsity: The Lasso and Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/ Examples -------- >>> from sklearn.metrics import d2_absolute_error_score >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> d2_absolute_error_score(y_true, y_pred) 0.764... >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> d2_absolute_error_score(y_true, y_pred, multioutput='uniform_average') 0.691... >>> d2_absolute_error_score(y_true, y_pred, multioutput='raw_values') array([0.8125 , 0.57142857]) >>> y_true = [1, 2, 3] >>> y_pred = [1, 2, 3] >>> d2_absolute_error_score(y_true, y_pred) 1.0 >>> y_true = [1, 2, 3] >>> y_pred = [2, 2, 2] >>> d2_absolute_error_score(y_true, y_pred) 0.0 >>> y_true = [1, 2, 3] >>> y_pred = [3, 2, 1] >>> d2_absolute_error_score(y_true, y_pred) -1.0 """ return d2_pinball_score( y_true, y_pred, sample_weight=sample_weight, alpha=0.5, multioutput=multioutput )