505 lines
17 KiB
Python
505 lines
17 KiB
Python
|
from collections import Counter
|
||
|
|
||
|
from ...utils._plotting import _BinaryClassifierCurveDisplayMixin
|
||
|
from .._ranking import average_precision_score, precision_recall_curve
|
||
|
|
||
|
|
||
|
class PrecisionRecallDisplay(_BinaryClassifierCurveDisplayMixin):
|
||
|
"""Precision Recall visualization.
|
||
|
|
||
|
It is recommend to use
|
||
|
:func:`~sklearn.metrics.PrecisionRecallDisplay.from_estimator` or
|
||
|
:func:`~sklearn.metrics.PrecisionRecallDisplay.from_predictions` to create
|
||
|
a :class:`~sklearn.metrics.PrecisionRecallDisplay`. All parameters are
|
||
|
stored as attributes.
|
||
|
|
||
|
Read more in the :ref:`User Guide <visualizations>`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
precision : ndarray
|
||
|
Precision values.
|
||
|
|
||
|
recall : ndarray
|
||
|
Recall values.
|
||
|
|
||
|
average_precision : float, default=None
|
||
|
Average precision. If None, the average precision is not shown.
|
||
|
|
||
|
estimator_name : str, default=None
|
||
|
Name of estimator. If None, then the estimator name is not shown.
|
||
|
|
||
|
pos_label : int, float, bool or str, default=None
|
||
|
The class considered as the positive class. If None, the class will not
|
||
|
be shown in the legend.
|
||
|
|
||
|
.. versionadded:: 0.24
|
||
|
|
||
|
prevalence_pos_label : float, default=None
|
||
|
The prevalence of the positive label. It is used for plotting the
|
||
|
chance level line. If None, the chance level line will not be plotted
|
||
|
even if `plot_chance_level` is set to True when plotting.
|
||
|
|
||
|
.. versionadded:: 1.3
|
||
|
|
||
|
Attributes
|
||
|
----------
|
||
|
line_ : matplotlib Artist
|
||
|
Precision recall curve.
|
||
|
|
||
|
chance_level_ : matplotlib Artist or None
|
||
|
The chance level line. It is `None` if the chance level is not plotted.
|
||
|
|
||
|
.. versionadded:: 1.3
|
||
|
|
||
|
ax_ : matplotlib Axes
|
||
|
Axes with precision recall curve.
|
||
|
|
||
|
figure_ : matplotlib Figure
|
||
|
Figure containing the curve.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
precision_recall_curve : Compute precision-recall pairs for different
|
||
|
probability thresholds.
|
||
|
PrecisionRecallDisplay.from_estimator : Plot Precision Recall Curve given
|
||
|
a binary classifier.
|
||
|
PrecisionRecallDisplay.from_predictions : Plot Precision Recall Curve
|
||
|
using predictions from a binary classifier.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
The average precision (cf. :func:`~sklearn.metrics.average_precision_score`) in
|
||
|
scikit-learn is computed without any interpolation. To be consistent with
|
||
|
this metric, the precision-recall curve is plotted without any
|
||
|
interpolation as well (step-wise style).
|
||
|
|
||
|
You can change this style by passing the keyword argument
|
||
|
`drawstyle="default"` in :meth:`plot`, :meth:`from_estimator`, or
|
||
|
:meth:`from_predictions`. However, the curve will not be strictly
|
||
|
consistent with the reported average precision.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> import matplotlib.pyplot as plt
|
||
|
>>> from sklearn.datasets import make_classification
|
||
|
>>> from sklearn.metrics import (precision_recall_curve,
|
||
|
... PrecisionRecallDisplay)
|
||
|
>>> from sklearn.model_selection import train_test_split
|
||
|
>>> from sklearn.svm import SVC
|
||
|
>>> X, y = make_classification(random_state=0)
|
||
|
>>> X_train, X_test, y_train, y_test = train_test_split(X, y,
|
||
|
... random_state=0)
|
||
|
>>> clf = SVC(random_state=0)
|
||
|
>>> clf.fit(X_train, y_train)
|
||
|
SVC(random_state=0)
|
||
|
>>> predictions = clf.predict(X_test)
|
||
|
>>> precision, recall, _ = precision_recall_curve(y_test, predictions)
|
||
|
>>> disp = PrecisionRecallDisplay(precision=precision, recall=recall)
|
||
|
>>> disp.plot()
|
||
|
<...>
|
||
|
>>> plt.show()
|
||
|
"""
|
||
|
|
||
|
def __init__(
|
||
|
self,
|
||
|
precision,
|
||
|
recall,
|
||
|
*,
|
||
|
average_precision=None,
|
||
|
estimator_name=None,
|
||
|
pos_label=None,
|
||
|
prevalence_pos_label=None,
|
||
|
):
|
||
|
self.estimator_name = estimator_name
|
||
|
self.precision = precision
|
||
|
self.recall = recall
|
||
|
self.average_precision = average_precision
|
||
|
self.pos_label = pos_label
|
||
|
self.prevalence_pos_label = prevalence_pos_label
|
||
|
|
||
|
def plot(
|
||
|
self,
|
||
|
ax=None,
|
||
|
*,
|
||
|
name=None,
|
||
|
plot_chance_level=False,
|
||
|
chance_level_kw=None,
|
||
|
**kwargs,
|
||
|
):
|
||
|
"""Plot visualization.
|
||
|
|
||
|
Extra keyword arguments will be passed to matplotlib's `plot`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
ax : Matplotlib Axes, default=None
|
||
|
Axes object to plot on. If `None`, a new figure and axes is
|
||
|
created.
|
||
|
|
||
|
name : str, default=None
|
||
|
Name of precision recall curve for labeling. If `None`, use
|
||
|
`estimator_name` if not `None`, otherwise no labeling is shown.
|
||
|
|
||
|
plot_chance_level : bool, default=False
|
||
|
Whether to plot the chance level. The chance level is the prevalence
|
||
|
of the positive label computed from the data passed during
|
||
|
:meth:`from_estimator` or :meth:`from_predictions` call.
|
||
|
|
||
|
.. versionadded:: 1.3
|
||
|
|
||
|
chance_level_kw : dict, default=None
|
||
|
Keyword arguments to be passed to matplotlib's `plot` for rendering
|
||
|
the chance level line.
|
||
|
|
||
|
.. versionadded:: 1.3
|
||
|
|
||
|
**kwargs : dict
|
||
|
Keyword arguments to be passed to matplotlib's `plot`.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
display : :class:`~sklearn.metrics.PrecisionRecallDisplay`
|
||
|
Object that stores computed values.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
The average precision (cf. :func:`~sklearn.metrics.average_precision_score`)
|
||
|
in scikit-learn is computed without any interpolation. To be consistent
|
||
|
with this metric, the precision-recall curve is plotted without any
|
||
|
interpolation as well (step-wise style).
|
||
|
|
||
|
You can change this style by passing the keyword argument
|
||
|
`drawstyle="default"`. However, the curve will not be strictly
|
||
|
consistent with the reported average precision.
|
||
|
"""
|
||
|
self.ax_, self.figure_, name = self._validate_plot_params(ax=ax, name=name)
|
||
|
|
||
|
line_kwargs = {"drawstyle": "steps-post"}
|
||
|
if self.average_precision is not None and name is not None:
|
||
|
line_kwargs["label"] = f"{name} (AP = {self.average_precision:0.2f})"
|
||
|
elif self.average_precision is not None:
|
||
|
line_kwargs["label"] = f"AP = {self.average_precision:0.2f}"
|
||
|
elif name is not None:
|
||
|
line_kwargs["label"] = name
|
||
|
line_kwargs.update(**kwargs)
|
||
|
|
||
|
(self.line_,) = self.ax_.plot(self.recall, self.precision, **line_kwargs)
|
||
|
|
||
|
info_pos_label = (
|
||
|
f" (Positive label: {self.pos_label})" if self.pos_label is not None else ""
|
||
|
)
|
||
|
|
||
|
xlabel = "Recall" + info_pos_label
|
||
|
ylabel = "Precision" + info_pos_label
|
||
|
self.ax_.set(
|
||
|
xlabel=xlabel,
|
||
|
xlim=(-0.01, 1.01),
|
||
|
ylabel=ylabel,
|
||
|
ylim=(-0.01, 1.01),
|
||
|
aspect="equal",
|
||
|
)
|
||
|
|
||
|
if plot_chance_level:
|
||
|
if self.prevalence_pos_label is None:
|
||
|
raise ValueError(
|
||
|
"You must provide prevalence_pos_label when constructing the "
|
||
|
"PrecisionRecallDisplay object in order to plot the chance "
|
||
|
"level line. Alternatively, you may use "
|
||
|
"PrecisionRecallDisplay.from_estimator or "
|
||
|
"PrecisionRecallDisplay.from_predictions "
|
||
|
"to automatically set prevalence_pos_label"
|
||
|
)
|
||
|
|
||
|
chance_level_line_kw = {
|
||
|
"label": f"Chance level (AP = {self.prevalence_pos_label:0.2f})",
|
||
|
"color": "k",
|
||
|
"linestyle": "--",
|
||
|
}
|
||
|
if chance_level_kw is not None:
|
||
|
chance_level_line_kw.update(chance_level_kw)
|
||
|
|
||
|
(self.chance_level_,) = self.ax_.plot(
|
||
|
(0, 1),
|
||
|
(self.prevalence_pos_label, self.prevalence_pos_label),
|
||
|
**chance_level_line_kw,
|
||
|
)
|
||
|
else:
|
||
|
self.chance_level_ = None
|
||
|
|
||
|
if "label" in line_kwargs or plot_chance_level:
|
||
|
self.ax_.legend(loc="lower left")
|
||
|
|
||
|
return self
|
||
|
|
||
|
@classmethod
|
||
|
def from_estimator(
|
||
|
cls,
|
||
|
estimator,
|
||
|
X,
|
||
|
y,
|
||
|
*,
|
||
|
sample_weight=None,
|
||
|
pos_label=None,
|
||
|
drop_intermediate=False,
|
||
|
response_method="auto",
|
||
|
name=None,
|
||
|
ax=None,
|
||
|
plot_chance_level=False,
|
||
|
chance_level_kw=None,
|
||
|
**kwargs,
|
||
|
):
|
||
|
"""Plot precision-recall curve given an estimator and some data.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
estimator : estimator instance
|
||
|
Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`
|
||
|
in which the last estimator is a classifier.
|
||
|
|
||
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||
|
Input values.
|
||
|
|
||
|
y : array-like of shape (n_samples,)
|
||
|
Target values.
|
||
|
|
||
|
sample_weight : array-like of shape (n_samples,), default=None
|
||
|
Sample weights.
|
||
|
|
||
|
pos_label : int, float, bool or str, default=None
|
||
|
The class considered as the positive class when computing the
|
||
|
precision and recall metrics. By default, `estimators.classes_[1]`
|
||
|
is considered as the positive class.
|
||
|
|
||
|
drop_intermediate : bool, default=False
|
||
|
Whether to drop some suboptimal thresholds which would not appear
|
||
|
on a plotted precision-recall curve. This is useful in order to
|
||
|
create lighter precision-recall curves.
|
||
|
|
||
|
.. versionadded:: 1.3
|
||
|
|
||
|
response_method : {'predict_proba', 'decision_function', 'auto'}, \
|
||
|
default='auto'
|
||
|
Specifies whether to use :term:`predict_proba` or
|
||
|
:term:`decision_function` as the target response. If set to 'auto',
|
||
|
:term:`predict_proba` is tried first and if it does not exist
|
||
|
:term:`decision_function` is tried next.
|
||
|
|
||
|
name : str, default=None
|
||
|
Name for labeling curve. If `None`, no name is used.
|
||
|
|
||
|
ax : matplotlib axes, default=None
|
||
|
Axes object to plot on. If `None`, a new figure and axes is created.
|
||
|
|
||
|
plot_chance_level : bool, default=False
|
||
|
Whether to plot the chance level. The chance level is the prevalence
|
||
|
of the positive label computed from the data passed during
|
||
|
:meth:`from_estimator` or :meth:`from_predictions` call.
|
||
|
|
||
|
.. versionadded:: 1.3
|
||
|
|
||
|
chance_level_kw : dict, default=None
|
||
|
Keyword arguments to be passed to matplotlib's `plot` for rendering
|
||
|
the chance level line.
|
||
|
|
||
|
.. versionadded:: 1.3
|
||
|
|
||
|
**kwargs : dict
|
||
|
Keyword arguments to be passed to matplotlib's `plot`.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
display : :class:`~sklearn.metrics.PrecisionRecallDisplay`
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
PrecisionRecallDisplay.from_predictions : Plot precision-recall curve
|
||
|
using estimated probabilities or output of decision function.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
The average precision (cf. :func:`~sklearn.metrics.average_precision_score`)
|
||
|
in scikit-learn is computed without any interpolation. To be consistent
|
||
|
with this metric, the precision-recall curve is plotted without any
|
||
|
interpolation as well (step-wise style).
|
||
|
|
||
|
You can change this style by passing the keyword argument
|
||
|
`drawstyle="default"`. However, the curve will not be strictly
|
||
|
consistent with the reported average precision.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> import matplotlib.pyplot as plt
|
||
|
>>> from sklearn.datasets import make_classification
|
||
|
>>> from sklearn.metrics import PrecisionRecallDisplay
|
||
|
>>> from sklearn.model_selection import train_test_split
|
||
|
>>> from sklearn.linear_model import LogisticRegression
|
||
|
>>> X, y = make_classification(random_state=0)
|
||
|
>>> X_train, X_test, y_train, y_test = train_test_split(
|
||
|
... X, y, random_state=0)
|
||
|
>>> clf = LogisticRegression()
|
||
|
>>> clf.fit(X_train, y_train)
|
||
|
LogisticRegression()
|
||
|
>>> PrecisionRecallDisplay.from_estimator(
|
||
|
... clf, X_test, y_test)
|
||
|
<...>
|
||
|
>>> plt.show()
|
||
|
"""
|
||
|
y_pred, pos_label, name = cls._validate_and_get_response_values(
|
||
|
estimator,
|
||
|
X,
|
||
|
y,
|
||
|
response_method=response_method,
|
||
|
pos_label=pos_label,
|
||
|
name=name,
|
||
|
)
|
||
|
|
||
|
return cls.from_predictions(
|
||
|
y,
|
||
|
y_pred,
|
||
|
sample_weight=sample_weight,
|
||
|
name=name,
|
||
|
pos_label=pos_label,
|
||
|
drop_intermediate=drop_intermediate,
|
||
|
ax=ax,
|
||
|
plot_chance_level=plot_chance_level,
|
||
|
chance_level_kw=chance_level_kw,
|
||
|
**kwargs,
|
||
|
)
|
||
|
|
||
|
@classmethod
|
||
|
def from_predictions(
|
||
|
cls,
|
||
|
y_true,
|
||
|
y_pred,
|
||
|
*,
|
||
|
sample_weight=None,
|
||
|
pos_label=None,
|
||
|
drop_intermediate=False,
|
||
|
name=None,
|
||
|
ax=None,
|
||
|
plot_chance_level=False,
|
||
|
chance_level_kw=None,
|
||
|
**kwargs,
|
||
|
):
|
||
|
"""Plot precision-recall curve given binary class predictions.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
y_true : array-like of shape (n_samples,)
|
||
|
True binary labels.
|
||
|
|
||
|
y_pred : array-like of shape (n_samples,)
|
||
|
Estimated probabilities or output of decision function.
|
||
|
|
||
|
sample_weight : array-like of shape (n_samples,), default=None
|
||
|
Sample weights.
|
||
|
|
||
|
pos_label : int, float, bool or str, default=None
|
||
|
The class considered as the positive class when computing the
|
||
|
precision and recall metrics.
|
||
|
|
||
|
drop_intermediate : bool, default=False
|
||
|
Whether to drop some suboptimal thresholds which would not appear
|
||
|
on a plotted precision-recall curve. This is useful in order to
|
||
|
create lighter precision-recall curves.
|
||
|
|
||
|
.. versionadded:: 1.3
|
||
|
|
||
|
name : str, default=None
|
||
|
Name for labeling curve. If `None`, name will be set to
|
||
|
`"Classifier"`.
|
||
|
|
||
|
ax : matplotlib axes, default=None
|
||
|
Axes object to plot on. If `None`, a new figure and axes is created.
|
||
|
|
||
|
plot_chance_level : bool, default=False
|
||
|
Whether to plot the chance level. The chance level is the prevalence
|
||
|
of the positive label computed from the data passed during
|
||
|
:meth:`from_estimator` or :meth:`from_predictions` call.
|
||
|
|
||
|
.. versionadded:: 1.3
|
||
|
|
||
|
chance_level_kw : dict, default=None
|
||
|
Keyword arguments to be passed to matplotlib's `plot` for rendering
|
||
|
the chance level line.
|
||
|
|
||
|
.. versionadded:: 1.3
|
||
|
|
||
|
**kwargs : dict
|
||
|
Keyword arguments to be passed to matplotlib's `plot`.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
display : :class:`~sklearn.metrics.PrecisionRecallDisplay`
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
PrecisionRecallDisplay.from_estimator : Plot precision-recall curve
|
||
|
using an estimator.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
The average precision (cf. :func:`~sklearn.metrics.average_precision_score`)
|
||
|
in scikit-learn is computed without any interpolation. To be consistent
|
||
|
with this metric, the precision-recall curve is plotted without any
|
||
|
interpolation as well (step-wise style).
|
||
|
|
||
|
You can change this style by passing the keyword argument
|
||
|
`drawstyle="default"`. However, the curve will not be strictly
|
||
|
consistent with the reported average precision.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> import matplotlib.pyplot as plt
|
||
|
>>> from sklearn.datasets import make_classification
|
||
|
>>> from sklearn.metrics import PrecisionRecallDisplay
|
||
|
>>> from sklearn.model_selection import train_test_split
|
||
|
>>> from sklearn.linear_model import LogisticRegression
|
||
|
>>> X, y = make_classification(random_state=0)
|
||
|
>>> X_train, X_test, y_train, y_test = train_test_split(
|
||
|
... X, y, random_state=0)
|
||
|
>>> clf = LogisticRegression()
|
||
|
>>> clf.fit(X_train, y_train)
|
||
|
LogisticRegression()
|
||
|
>>> y_pred = clf.predict_proba(X_test)[:, 1]
|
||
|
>>> PrecisionRecallDisplay.from_predictions(
|
||
|
... y_test, y_pred)
|
||
|
<...>
|
||
|
>>> plt.show()
|
||
|
"""
|
||
|
pos_label, name = cls._validate_from_predictions_params(
|
||
|
y_true, y_pred, sample_weight=sample_weight, pos_label=pos_label, name=name
|
||
|
)
|
||
|
|
||
|
precision, recall, _ = precision_recall_curve(
|
||
|
y_true,
|
||
|
y_pred,
|
||
|
pos_label=pos_label,
|
||
|
sample_weight=sample_weight,
|
||
|
drop_intermediate=drop_intermediate,
|
||
|
)
|
||
|
average_precision = average_precision_score(
|
||
|
y_true, y_pred, pos_label=pos_label, sample_weight=sample_weight
|
||
|
)
|
||
|
|
||
|
class_count = Counter(y_true)
|
||
|
prevalence_pos_label = class_count[pos_label] / sum(class_count.values())
|
||
|
|
||
|
viz = cls(
|
||
|
precision=precision,
|
||
|
recall=recall,
|
||
|
average_precision=average_precision,
|
||
|
estimator_name=name,
|
||
|
pos_label=pos_label,
|
||
|
prevalence_pos_label=prevalence_pos_label,
|
||
|
)
|
||
|
|
||
|
return viz.plot(
|
||
|
ax=ax,
|
||
|
name=name,
|
||
|
plot_chance_level=plot_chance_level,
|
||
|
chance_level_kw=chance_level_kw,
|
||
|
**kwargs,
|
||
|
)
|