1012 lines
34 KiB
Python
1012 lines
34 KiB
Python
|
"""
|
||
|
This module implements multioutput regression and classification.
|
||
|
|
||
|
The estimators provided in this module are meta-estimators: they require
|
||
|
a base estimator to be provided in their constructor. The meta-estimator
|
||
|
extends single output estimators to multioutput estimators.
|
||
|
"""
|
||
|
|
||
|
# Author: Tim Head <betatim@gmail.com>
|
||
|
# Author: Hugo Bowne-Anderson <hugobowne@gmail.com>
|
||
|
# Author: Chris Rivera <chris.richard.rivera@gmail.com>
|
||
|
# Author: Michael Williamson
|
||
|
# Author: James Ashton Nichols <james.ashton.nichols@gmail.com>
|
||
|
#
|
||
|
# License: BSD 3 clause
|
||
|
from numbers import Integral
|
||
|
|
||
|
import numpy as np
|
||
|
import scipy.sparse as sp
|
||
|
|
||
|
from abc import ABCMeta, abstractmethod
|
||
|
from .base import BaseEstimator, clone, MetaEstimatorMixin
|
||
|
from .base import RegressorMixin, ClassifierMixin, is_classifier
|
||
|
from .model_selection import cross_val_predict
|
||
|
from .utils import check_random_state, _print_elapsed_time
|
||
|
from .utils.metaestimators import available_if
|
||
|
from .utils.multiclass import check_classification_targets
|
||
|
from .utils.validation import (
|
||
|
check_is_fitted,
|
||
|
has_fit_parameter,
|
||
|
_check_fit_params,
|
||
|
)
|
||
|
from .utils.parallel import delayed, Parallel
|
||
|
from .utils._param_validation import HasMethods, StrOptions
|
||
|
|
||
|
__all__ = [
|
||
|
"MultiOutputRegressor",
|
||
|
"MultiOutputClassifier",
|
||
|
"ClassifierChain",
|
||
|
"RegressorChain",
|
||
|
]
|
||
|
|
||
|
|
||
|
def _fit_estimator(estimator, X, y, sample_weight=None, **fit_params):
|
||
|
estimator = clone(estimator)
|
||
|
if sample_weight is not None:
|
||
|
estimator.fit(X, y, sample_weight=sample_weight, **fit_params)
|
||
|
else:
|
||
|
estimator.fit(X, y, **fit_params)
|
||
|
return estimator
|
||
|
|
||
|
|
||
|
def _partial_fit_estimator(
|
||
|
estimator, X, y, classes=None, sample_weight=None, first_time=True
|
||
|
):
|
||
|
if first_time:
|
||
|
estimator = clone(estimator)
|
||
|
|
||
|
if sample_weight is not None:
|
||
|
if classes is not None:
|
||
|
estimator.partial_fit(X, y, classes=classes, sample_weight=sample_weight)
|
||
|
else:
|
||
|
estimator.partial_fit(X, y, sample_weight=sample_weight)
|
||
|
else:
|
||
|
if classes is not None:
|
||
|
estimator.partial_fit(X, y, classes=classes)
|
||
|
else:
|
||
|
estimator.partial_fit(X, y)
|
||
|
return estimator
|
||
|
|
||
|
|
||
|
def _available_if_estimator_has(attr):
|
||
|
"""Return a function to check if `estimator` or `estimators_` has `attr`.
|
||
|
|
||
|
Helper for Chain implementations.
|
||
|
"""
|
||
|
|
||
|
def _check(self):
|
||
|
return hasattr(self.estimator, attr) or all(
|
||
|
hasattr(est, attr) for est in self.estimators_
|
||
|
)
|
||
|
|
||
|
return available_if(_check)
|
||
|
|
||
|
|
||
|
class _MultiOutputEstimator(MetaEstimatorMixin, BaseEstimator, metaclass=ABCMeta):
|
||
|
|
||
|
_parameter_constraints: dict = {
|
||
|
"estimator": [HasMethods(["fit", "predict"])],
|
||
|
"n_jobs": [Integral, None],
|
||
|
}
|
||
|
|
||
|
@abstractmethod
|
||
|
def __init__(self, estimator, *, n_jobs=None):
|
||
|
self.estimator = estimator
|
||
|
self.n_jobs = n_jobs
|
||
|
|
||
|
@_available_if_estimator_has("partial_fit")
|
||
|
def partial_fit(self, X, y, classes=None, sample_weight=None):
|
||
|
"""Incrementally fit a separate model for each class output.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||
|
The input data.
|
||
|
|
||
|
y : {array-like, sparse matrix} of shape (n_samples, n_outputs)
|
||
|
Multi-output targets.
|
||
|
|
||
|
classes : list of ndarray of shape (n_outputs,), default=None
|
||
|
Each array is unique classes for one output in str/int.
|
||
|
Can be obtained via
|
||
|
``[np.unique(y[:, i]) for i in range(y.shape[1])]``, where `y`
|
||
|
is the target matrix of the entire dataset.
|
||
|
This argument is required for the first call to partial_fit
|
||
|
and can be omitted in the subsequent calls.
|
||
|
Note that `y` doesn't need to contain all labels in `classes`.
|
||
|
|
||
|
sample_weight : array-like of shape (n_samples,), default=None
|
||
|
Sample weights. If `None`, then samples are equally weighted.
|
||
|
Only supported if the underlying regressor supports sample
|
||
|
weights.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
self : object
|
||
|
Returns a fitted instance.
|
||
|
"""
|
||
|
first_time = not hasattr(self, "estimators_")
|
||
|
|
||
|
if first_time:
|
||
|
self._validate_params()
|
||
|
|
||
|
y = self._validate_data(X="no_validation", y=y, multi_output=True)
|
||
|
|
||
|
if y.ndim == 1:
|
||
|
raise ValueError(
|
||
|
"y must have at least two dimensions for "
|
||
|
"multi-output regression but has only one."
|
||
|
)
|
||
|
|
||
|
if sample_weight is not None and not has_fit_parameter(
|
||
|
self.estimator, "sample_weight"
|
||
|
):
|
||
|
raise ValueError("Underlying estimator does not support sample weights.")
|
||
|
|
||
|
self.estimators_ = Parallel(n_jobs=self.n_jobs)(
|
||
|
delayed(_partial_fit_estimator)(
|
||
|
self.estimators_[i] if not first_time else self.estimator,
|
||
|
X,
|
||
|
y[:, i],
|
||
|
classes[i] if classes is not None else None,
|
||
|
sample_weight,
|
||
|
first_time,
|
||
|
)
|
||
|
for i in range(y.shape[1])
|
||
|
)
|
||
|
|
||
|
if first_time and hasattr(self.estimators_[0], "n_features_in_"):
|
||
|
self.n_features_in_ = self.estimators_[0].n_features_in_
|
||
|
if first_time and hasattr(self.estimators_[0], "feature_names_in_"):
|
||
|
self.feature_names_in_ = self.estimators_[0].feature_names_in_
|
||
|
|
||
|
return self
|
||
|
|
||
|
def fit(self, X, y, sample_weight=None, **fit_params):
|
||
|
"""Fit the model to data, separately for each output variable.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||
|
The input data.
|
||
|
|
||
|
y : {array-like, sparse matrix} of shape (n_samples, n_outputs)
|
||
|
Multi-output targets. An indicator matrix turns on multilabel
|
||
|
estimation.
|
||
|
|
||
|
sample_weight : array-like of shape (n_samples,), default=None
|
||
|
Sample weights. If `None`, then samples are equally weighted.
|
||
|
Only supported if the underlying regressor supports sample
|
||
|
weights.
|
||
|
|
||
|
**fit_params : dict of string -> object
|
||
|
Parameters passed to the ``estimator.fit`` method of each step.
|
||
|
|
||
|
.. versionadded:: 0.23
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
self : object
|
||
|
Returns a fitted instance.
|
||
|
"""
|
||
|
self._validate_params()
|
||
|
|
||
|
if not hasattr(self.estimator, "fit"):
|
||
|
raise ValueError("The base estimator should implement a fit method")
|
||
|
|
||
|
y = self._validate_data(X="no_validation", y=y, multi_output=True)
|
||
|
|
||
|
if is_classifier(self):
|
||
|
check_classification_targets(y)
|
||
|
|
||
|
if y.ndim == 1:
|
||
|
raise ValueError(
|
||
|
"y must have at least two dimensions for "
|
||
|
"multi-output regression but has only one."
|
||
|
)
|
||
|
|
||
|
if sample_weight is not None and not has_fit_parameter(
|
||
|
self.estimator, "sample_weight"
|
||
|
):
|
||
|
raise ValueError("Underlying estimator does not support sample weights.")
|
||
|
|
||
|
fit_params_validated = _check_fit_params(X, fit_params)
|
||
|
|
||
|
self.estimators_ = Parallel(n_jobs=self.n_jobs)(
|
||
|
delayed(_fit_estimator)(
|
||
|
self.estimator, X, y[:, i], sample_weight, **fit_params_validated
|
||
|
)
|
||
|
for i in range(y.shape[1])
|
||
|
)
|
||
|
|
||
|
if hasattr(self.estimators_[0], "n_features_in_"):
|
||
|
self.n_features_in_ = self.estimators_[0].n_features_in_
|
||
|
if hasattr(self.estimators_[0], "feature_names_in_"):
|
||
|
self.feature_names_in_ = self.estimators_[0].feature_names_in_
|
||
|
|
||
|
return self
|
||
|
|
||
|
def predict(self, X):
|
||
|
"""Predict multi-output variable using model for each target variable.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||
|
The input data.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
y : {array-like, sparse matrix} of shape (n_samples, n_outputs)
|
||
|
Multi-output targets predicted across multiple predictors.
|
||
|
Note: Separate models are generated for each predictor.
|
||
|
"""
|
||
|
check_is_fitted(self)
|
||
|
if not hasattr(self.estimators_[0], "predict"):
|
||
|
raise ValueError("The base estimator should implement a predict method")
|
||
|
|
||
|
y = Parallel(n_jobs=self.n_jobs)(
|
||
|
delayed(e.predict)(X) for e in self.estimators_
|
||
|
)
|
||
|
|
||
|
return np.asarray(y).T
|
||
|
|
||
|
def _more_tags(self):
|
||
|
return {"multioutput_only": True}
|
||
|
|
||
|
|
||
|
class MultiOutputRegressor(RegressorMixin, _MultiOutputEstimator):
|
||
|
"""Multi target regression.
|
||
|
|
||
|
This strategy consists of fitting one regressor per target. This is a
|
||
|
simple strategy for extending regressors that do not natively support
|
||
|
multi-target regression.
|
||
|
|
||
|
.. versionadded:: 0.18
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
estimator : estimator object
|
||
|
An estimator object implementing :term:`fit` and :term:`predict`.
|
||
|
|
||
|
n_jobs : int or None, optional (default=None)
|
||
|
The number of jobs to run in parallel.
|
||
|
:meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported
|
||
|
by the passed estimator) will be parallelized for each target.
|
||
|
|
||
|
When individual estimators are fast to train or predict,
|
||
|
using ``n_jobs > 1`` can result in slower performance due
|
||
|
to the parallelism overhead.
|
||
|
|
||
|
``None`` means `1` unless in a :obj:`joblib.parallel_backend` context.
|
||
|
``-1`` means using all available processes / threads.
|
||
|
See :term:`Glossary <n_jobs>` for more details.
|
||
|
|
||
|
.. versionchanged:: 0.20
|
||
|
`n_jobs` default changed from `1` to `None`.
|
||
|
|
||
|
Attributes
|
||
|
----------
|
||
|
estimators_ : list of ``n_output`` estimators
|
||
|
Estimators used for predictions.
|
||
|
|
||
|
n_features_in_ : int
|
||
|
Number of features seen during :term:`fit`. Only defined if the
|
||
|
underlying `estimator` exposes such an attribute when fit.
|
||
|
|
||
|
.. versionadded:: 0.24
|
||
|
|
||
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
||
|
Names of features seen during :term:`fit`. Only defined if the
|
||
|
underlying estimators expose such an attribute when fit.
|
||
|
|
||
|
.. versionadded:: 1.0
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
RegressorChain : A multi-label model that arranges regressions into a
|
||
|
chain.
|
||
|
MultiOutputClassifier : Classifies each output independently rather than
|
||
|
chaining.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> import numpy as np
|
||
|
>>> from sklearn.datasets import load_linnerud
|
||
|
>>> from sklearn.multioutput import MultiOutputRegressor
|
||
|
>>> from sklearn.linear_model import Ridge
|
||
|
>>> X, y = load_linnerud(return_X_y=True)
|
||
|
>>> regr = MultiOutputRegressor(Ridge(random_state=123)).fit(X, y)
|
||
|
>>> regr.predict(X[[0]])
|
||
|
array([[176..., 35..., 57...]])
|
||
|
"""
|
||
|
|
||
|
def __init__(self, estimator, *, n_jobs=None):
|
||
|
super().__init__(estimator, n_jobs=n_jobs)
|
||
|
|
||
|
@_available_if_estimator_has("partial_fit")
|
||
|
def partial_fit(self, X, y, sample_weight=None):
|
||
|
"""Incrementally fit the model to data, for each output variable.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||
|
The input data.
|
||
|
|
||
|
y : {array-like, sparse matrix} of shape (n_samples, n_outputs)
|
||
|
Multi-output targets.
|
||
|
|
||
|
sample_weight : array-like of shape (n_samples,), default=None
|
||
|
Sample weights. If `None`, then samples are equally weighted.
|
||
|
Only supported if the underlying regressor supports sample
|
||
|
weights.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
self : object
|
||
|
Returns a fitted instance.
|
||
|
"""
|
||
|
super().partial_fit(X, y, sample_weight=sample_weight)
|
||
|
|
||
|
|
||
|
class MultiOutputClassifier(ClassifierMixin, _MultiOutputEstimator):
|
||
|
"""Multi target classification.
|
||
|
|
||
|
This strategy consists of fitting one classifier per target. This is a
|
||
|
simple strategy for extending classifiers that do not natively support
|
||
|
multi-target classification.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
estimator : estimator object
|
||
|
An estimator object implementing :term:`fit` and :term:`predict`.
|
||
|
A :term:`predict_proba` method will be exposed only if `estimator` implements
|
||
|
it.
|
||
|
|
||
|
n_jobs : int or None, optional (default=None)
|
||
|
The number of jobs to run in parallel.
|
||
|
:meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported
|
||
|
by the passed estimator) will be parallelized for each target.
|
||
|
|
||
|
When individual estimators are fast to train or predict,
|
||
|
using ``n_jobs > 1`` can result in slower performance due
|
||
|
to the parallelism overhead.
|
||
|
|
||
|
``None`` means `1` unless in a :obj:`joblib.parallel_backend` context.
|
||
|
``-1`` means using all available processes / threads.
|
||
|
See :term:`Glossary <n_jobs>` for more details.
|
||
|
|
||
|
.. versionchanged:: 0.20
|
||
|
`n_jobs` default changed from `1` to `None`.
|
||
|
|
||
|
Attributes
|
||
|
----------
|
||
|
classes_ : ndarray of shape (n_classes,)
|
||
|
Class labels.
|
||
|
|
||
|
estimators_ : list of ``n_output`` estimators
|
||
|
Estimators used for predictions.
|
||
|
|
||
|
n_features_in_ : int
|
||
|
Number of features seen during :term:`fit`. Only defined if the
|
||
|
underlying `estimator` exposes such an attribute when fit.
|
||
|
|
||
|
.. versionadded:: 0.24
|
||
|
|
||
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
||
|
Names of features seen during :term:`fit`. Only defined if the
|
||
|
underlying estimators expose such an attribute when fit.
|
||
|
|
||
|
.. versionadded:: 1.0
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
ClassifierChain : A multi-label model that arranges binary classifiers
|
||
|
into a chain.
|
||
|
MultiOutputRegressor : Fits one regressor per target variable.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> import numpy as np
|
||
|
>>> from sklearn.datasets import make_multilabel_classification
|
||
|
>>> from sklearn.multioutput import MultiOutputClassifier
|
||
|
>>> from sklearn.linear_model import LogisticRegression
|
||
|
>>> X, y = make_multilabel_classification(n_classes=3, random_state=0)
|
||
|
>>> clf = MultiOutputClassifier(LogisticRegression()).fit(X, y)
|
||
|
>>> clf.predict(X[-2:])
|
||
|
array([[1, 1, 1],
|
||
|
[1, 0, 1]])
|
||
|
"""
|
||
|
|
||
|
def __init__(self, estimator, *, n_jobs=None):
|
||
|
super().__init__(estimator, n_jobs=n_jobs)
|
||
|
|
||
|
def fit(self, X, Y, sample_weight=None, **fit_params):
|
||
|
"""Fit the model to data matrix X and targets Y.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||
|
The input data.
|
||
|
|
||
|
Y : array-like of shape (n_samples, n_classes)
|
||
|
The target values.
|
||
|
|
||
|
sample_weight : array-like of shape (n_samples,), default=None
|
||
|
Sample weights. If `None`, then samples are equally weighted.
|
||
|
Only supported if the underlying classifier supports sample
|
||
|
weights.
|
||
|
|
||
|
**fit_params : dict of string -> object
|
||
|
Parameters passed to the ``estimator.fit`` method of each step.
|
||
|
|
||
|
.. versionadded:: 0.23
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
self : object
|
||
|
Returns a fitted instance.
|
||
|
"""
|
||
|
super().fit(X, Y, sample_weight, **fit_params)
|
||
|
self.classes_ = [estimator.classes_ for estimator in self.estimators_]
|
||
|
return self
|
||
|
|
||
|
def _check_predict_proba(self):
|
||
|
if hasattr(self, "estimators_"):
|
||
|
# raise an AttributeError if `predict_proba` does not exist for
|
||
|
# each estimator
|
||
|
[getattr(est, "predict_proba") for est in self.estimators_]
|
||
|
return True
|
||
|
# raise an AttributeError if `predict_proba` does not exist for the
|
||
|
# unfitted estimator
|
||
|
getattr(self.estimator, "predict_proba")
|
||
|
return True
|
||
|
|
||
|
@available_if(_check_predict_proba)
|
||
|
def predict_proba(self, X):
|
||
|
"""Return prediction probabilities for each class of each output.
|
||
|
|
||
|
This method will raise a ``ValueError`` if any of the
|
||
|
estimators do not have ``predict_proba``.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : array-like of shape (n_samples, n_features)
|
||
|
The input data.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
p : array of shape (n_samples, n_classes), or a list of n_outputs \
|
||
|
such arrays if n_outputs > 1.
|
||
|
The class probabilities of the input samples. The order of the
|
||
|
classes corresponds to that in the attribute :term:`classes_`.
|
||
|
|
||
|
.. versionchanged:: 0.19
|
||
|
This function now returns a list of arrays where the length of
|
||
|
the list is ``n_outputs``, and each array is (``n_samples``,
|
||
|
``n_classes``) for that particular output.
|
||
|
"""
|
||
|
check_is_fitted(self)
|
||
|
results = [estimator.predict_proba(X) for estimator in self.estimators_]
|
||
|
return results
|
||
|
|
||
|
def score(self, X, y):
|
||
|
"""Return the mean accuracy on the given test data and labels.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : array-like of shape (n_samples, n_features)
|
||
|
Test samples.
|
||
|
|
||
|
y : array-like of shape (n_samples, n_outputs)
|
||
|
True values for X.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
scores : float
|
||
|
Mean accuracy of predicted target versus true target.
|
||
|
"""
|
||
|
check_is_fitted(self)
|
||
|
n_outputs_ = len(self.estimators_)
|
||
|
if y.ndim == 1:
|
||
|
raise ValueError(
|
||
|
"y must have at least two dimensions for "
|
||
|
"multi target classification but has only one"
|
||
|
)
|
||
|
if y.shape[1] != n_outputs_:
|
||
|
raise ValueError(
|
||
|
"The number of outputs of Y for fit {0} and"
|
||
|
" score {1} should be same".format(n_outputs_, y.shape[1])
|
||
|
)
|
||
|
y_pred = self.predict(X)
|
||
|
return np.mean(np.all(y == y_pred, axis=1))
|
||
|
|
||
|
def _more_tags(self):
|
||
|
# FIXME
|
||
|
return {"_skip_test": True}
|
||
|
|
||
|
|
||
|
def _available_if_base_estimator_has(attr):
|
||
|
"""Return a function to check if `base_estimator` or `estimators_` has `attr`.
|
||
|
|
||
|
Helper for Chain implementations.
|
||
|
"""
|
||
|
|
||
|
def _check(self):
|
||
|
return hasattr(self.base_estimator, attr) or all(
|
||
|
hasattr(est, attr) for est in self.estimators_
|
||
|
)
|
||
|
|
||
|
return available_if(_check)
|
||
|
|
||
|
|
||
|
class _BaseChain(BaseEstimator, metaclass=ABCMeta):
|
||
|
|
||
|
_parameter_constraints: dict = {
|
||
|
"base_estimator": [HasMethods(["fit", "predict"])],
|
||
|
"order": ["array-like", StrOptions({"random"}), None],
|
||
|
"cv": ["cv_object", StrOptions({"prefit"})],
|
||
|
"random_state": ["random_state"],
|
||
|
"verbose": ["boolean"],
|
||
|
}
|
||
|
|
||
|
def __init__(
|
||
|
self, base_estimator, *, order=None, cv=None, random_state=None, verbose=False
|
||
|
):
|
||
|
self.base_estimator = base_estimator
|
||
|
self.order = order
|
||
|
self.cv = cv
|
||
|
self.random_state = random_state
|
||
|
self.verbose = verbose
|
||
|
|
||
|
def _log_message(self, *, estimator_idx, n_estimators, processing_msg):
|
||
|
if not self.verbose:
|
||
|
return None
|
||
|
return f"({estimator_idx} of {n_estimators}) {processing_msg}"
|
||
|
|
||
|
@abstractmethod
|
||
|
def fit(self, X, Y, **fit_params):
|
||
|
"""Fit the model to data matrix X and targets Y.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||
|
The input data.
|
||
|
|
||
|
Y : array-like of shape (n_samples, n_classes)
|
||
|
The target values.
|
||
|
|
||
|
**fit_params : dict of string -> object
|
||
|
Parameters passed to the `fit` method of each step.
|
||
|
|
||
|
.. versionadded:: 0.23
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
self : object
|
||
|
Returns a fitted instance.
|
||
|
"""
|
||
|
X, Y = self._validate_data(X, Y, multi_output=True, accept_sparse=True)
|
||
|
|
||
|
random_state = check_random_state(self.random_state)
|
||
|
self.order_ = self.order
|
||
|
if isinstance(self.order_, tuple):
|
||
|
self.order_ = np.array(self.order_)
|
||
|
|
||
|
if self.order_ is None:
|
||
|
self.order_ = np.array(range(Y.shape[1]))
|
||
|
elif isinstance(self.order_, str):
|
||
|
if self.order_ == "random":
|
||
|
self.order_ = random_state.permutation(Y.shape[1])
|
||
|
elif sorted(self.order_) != list(range(Y.shape[1])):
|
||
|
raise ValueError("invalid order")
|
||
|
|
||
|
self.estimators_ = [clone(self.base_estimator) for _ in range(Y.shape[1])]
|
||
|
|
||
|
if self.cv is None:
|
||
|
Y_pred_chain = Y[:, self.order_]
|
||
|
if sp.issparse(X):
|
||
|
X_aug = sp.hstack((X, Y_pred_chain), format="lil")
|
||
|
X_aug = X_aug.tocsr()
|
||
|
else:
|
||
|
X_aug = np.hstack((X, Y_pred_chain))
|
||
|
|
||
|
elif sp.issparse(X):
|
||
|
Y_pred_chain = sp.lil_matrix((X.shape[0], Y.shape[1]))
|
||
|
X_aug = sp.hstack((X, Y_pred_chain), format="lil")
|
||
|
|
||
|
else:
|
||
|
Y_pred_chain = np.zeros((X.shape[0], Y.shape[1]))
|
||
|
X_aug = np.hstack((X, Y_pred_chain))
|
||
|
|
||
|
del Y_pred_chain
|
||
|
|
||
|
for chain_idx, estimator in enumerate(self.estimators_):
|
||
|
message = self._log_message(
|
||
|
estimator_idx=chain_idx + 1,
|
||
|
n_estimators=len(self.estimators_),
|
||
|
processing_msg=f"Processing order {self.order_[chain_idx]}",
|
||
|
)
|
||
|
y = Y[:, self.order_[chain_idx]]
|
||
|
with _print_elapsed_time("Chain", message):
|
||
|
estimator.fit(X_aug[:, : (X.shape[1] + chain_idx)], y, **fit_params)
|
||
|
if self.cv is not None and chain_idx < len(self.estimators_) - 1:
|
||
|
col_idx = X.shape[1] + chain_idx
|
||
|
cv_result = cross_val_predict(
|
||
|
self.base_estimator, X_aug[:, :col_idx], y=y, cv=self.cv
|
||
|
)
|
||
|
if sp.issparse(X_aug):
|
||
|
X_aug[:, col_idx] = np.expand_dims(cv_result, 1)
|
||
|
else:
|
||
|
X_aug[:, col_idx] = cv_result
|
||
|
|
||
|
return self
|
||
|
|
||
|
def predict(self, X):
|
||
|
"""Predict on the data matrix X using the ClassifierChain model.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||
|
The input data.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Y_pred : array-like of shape (n_samples, n_classes)
|
||
|
The predicted values.
|
||
|
"""
|
||
|
check_is_fitted(self)
|
||
|
X = self._validate_data(X, accept_sparse=True, reset=False)
|
||
|
Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))
|
||
|
for chain_idx, estimator in enumerate(self.estimators_):
|
||
|
previous_predictions = Y_pred_chain[:, :chain_idx]
|
||
|
if sp.issparse(X):
|
||
|
if chain_idx == 0:
|
||
|
X_aug = X
|
||
|
else:
|
||
|
X_aug = sp.hstack((X, previous_predictions))
|
||
|
else:
|
||
|
X_aug = np.hstack((X, previous_predictions))
|
||
|
Y_pred_chain[:, chain_idx] = estimator.predict(X_aug)
|
||
|
|
||
|
inv_order = np.empty_like(self.order_)
|
||
|
inv_order[self.order_] = np.arange(len(self.order_))
|
||
|
Y_pred = Y_pred_chain[:, inv_order]
|
||
|
|
||
|
return Y_pred
|
||
|
|
||
|
|
||
|
class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain):
|
||
|
"""A multi-label model that arranges binary classifiers into a chain.
|
||
|
|
||
|
Each model makes a prediction in the order specified by the chain using
|
||
|
all of the available features provided to the model plus the predictions
|
||
|
of models that are earlier in the chain.
|
||
|
|
||
|
Read more in the :ref:`User Guide <classifierchain>`.
|
||
|
|
||
|
.. versionadded:: 0.19
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
base_estimator : estimator
|
||
|
The base estimator from which the classifier chain is built.
|
||
|
|
||
|
order : array-like of shape (n_outputs,) or 'random', default=None
|
||
|
If `None`, the order will be determined by the order of columns in
|
||
|
the label matrix Y.::
|
||
|
|
||
|
order = [0, 1, 2, ..., Y.shape[1] - 1]
|
||
|
|
||
|
The order of the chain can be explicitly set by providing a list of
|
||
|
integers. For example, for a chain of length 5.::
|
||
|
|
||
|
order = [1, 3, 2, 4, 0]
|
||
|
|
||
|
means that the first model in the chain will make predictions for
|
||
|
column 1 in the Y matrix, the second model will make predictions
|
||
|
for column 3, etc.
|
||
|
|
||
|
If order is `random` a random ordering will be used.
|
||
|
|
||
|
cv : int, cross-validation generator or an iterable, default=None
|
||
|
Determines whether to use cross validated predictions or true
|
||
|
labels for the results of previous estimators in the chain.
|
||
|
Possible inputs for cv are:
|
||
|
|
||
|
- None, to use true labels when fitting,
|
||
|
- integer, to specify the number of folds in a (Stratified)KFold,
|
||
|
- :term:`CV splitter`,
|
||
|
- An iterable yielding (train, test) splits as arrays of indices.
|
||
|
|
||
|
random_state : int, RandomState instance or None, optional (default=None)
|
||
|
If ``order='random'``, determines random number generation for the
|
||
|
chain order.
|
||
|
In addition, it controls the random seed given at each `base_estimator`
|
||
|
at each chaining iteration. Thus, it is only used when `base_estimator`
|
||
|
exposes a `random_state`.
|
||
|
Pass an int for reproducible output across multiple function calls.
|
||
|
See :term:`Glossary <random_state>`.
|
||
|
|
||
|
verbose : bool, default=False
|
||
|
If True, chain progress is output as each model is completed.
|
||
|
|
||
|
.. versionadded:: 1.2
|
||
|
|
||
|
Attributes
|
||
|
----------
|
||
|
classes_ : list
|
||
|
A list of arrays of length ``len(estimators_)`` containing the
|
||
|
class labels for each estimator in the chain.
|
||
|
|
||
|
estimators_ : list
|
||
|
A list of clones of base_estimator.
|
||
|
|
||
|
order_ : list
|
||
|
The order of labels in the classifier chain.
|
||
|
|
||
|
n_features_in_ : int
|
||
|
Number of features seen during :term:`fit`. Only defined if the
|
||
|
underlying `base_estimator` exposes such an attribute when fit.
|
||
|
|
||
|
.. versionadded:: 0.24
|
||
|
|
||
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
||
|
Names of features seen during :term:`fit`. Defined only when `X`
|
||
|
has feature names that are all strings.
|
||
|
|
||
|
.. versionadded:: 1.0
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
RegressorChain : Equivalent for regression.
|
||
|
MultioutputClassifier : Classifies each output independently rather than
|
||
|
chaining.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
Jesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank, "Classifier
|
||
|
Chains for Multi-label Classification", 2009.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> from sklearn.datasets import make_multilabel_classification
|
||
|
>>> from sklearn.linear_model import LogisticRegression
|
||
|
>>> from sklearn.model_selection import train_test_split
|
||
|
>>> from sklearn.multioutput import ClassifierChain
|
||
|
>>> X, Y = make_multilabel_classification(
|
||
|
... n_samples=12, n_classes=3, random_state=0
|
||
|
... )
|
||
|
>>> X_train, X_test, Y_train, Y_test = train_test_split(
|
||
|
... X, Y, random_state=0
|
||
|
... )
|
||
|
>>> base_lr = LogisticRegression(solver='lbfgs', random_state=0)
|
||
|
>>> chain = ClassifierChain(base_lr, order='random', random_state=0)
|
||
|
>>> chain.fit(X_train, Y_train).predict(X_test)
|
||
|
array([[1., 1., 0.],
|
||
|
[1., 0., 0.],
|
||
|
[0., 1., 0.]])
|
||
|
>>> chain.predict_proba(X_test)
|
||
|
array([[0.8387..., 0.9431..., 0.4576...],
|
||
|
[0.8878..., 0.3684..., 0.2640...],
|
||
|
[0.0321..., 0.9935..., 0.0625...]])
|
||
|
"""
|
||
|
|
||
|
def fit(self, X, Y):
|
||
|
"""Fit the model to data matrix X and targets Y.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||
|
The input data.
|
||
|
|
||
|
Y : array-like of shape (n_samples, n_classes)
|
||
|
The target values.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
self : object
|
||
|
Class instance.
|
||
|
"""
|
||
|
self._validate_params()
|
||
|
|
||
|
super().fit(X, Y)
|
||
|
self.classes_ = [
|
||
|
estimator.classes_ for chain_idx, estimator in enumerate(self.estimators_)
|
||
|
]
|
||
|
return self
|
||
|
|
||
|
@_available_if_base_estimator_has("predict_proba")
|
||
|
def predict_proba(self, X):
|
||
|
"""Predict probability estimates.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||
|
The input data.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Y_prob : array-like of shape (n_samples, n_classes)
|
||
|
The predicted probabilities.
|
||
|
"""
|
||
|
X = self._validate_data(X, accept_sparse=True, reset=False)
|
||
|
Y_prob_chain = np.zeros((X.shape[0], len(self.estimators_)))
|
||
|
Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))
|
||
|
for chain_idx, estimator in enumerate(self.estimators_):
|
||
|
previous_predictions = Y_pred_chain[:, :chain_idx]
|
||
|
if sp.issparse(X):
|
||
|
X_aug = sp.hstack((X, previous_predictions))
|
||
|
else:
|
||
|
X_aug = np.hstack((X, previous_predictions))
|
||
|
Y_prob_chain[:, chain_idx] = estimator.predict_proba(X_aug)[:, 1]
|
||
|
Y_pred_chain[:, chain_idx] = estimator.predict(X_aug)
|
||
|
inv_order = np.empty_like(self.order_)
|
||
|
inv_order[self.order_] = np.arange(len(self.order_))
|
||
|
Y_prob = Y_prob_chain[:, inv_order]
|
||
|
|
||
|
return Y_prob
|
||
|
|
||
|
@_available_if_base_estimator_has("decision_function")
|
||
|
def decision_function(self, X):
|
||
|
"""Evaluate the decision_function of the models in the chain.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : array-like of shape (n_samples, n_features)
|
||
|
The input data.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Y_decision : array-like of shape (n_samples, n_classes)
|
||
|
Returns the decision function of the sample for each model
|
||
|
in the chain.
|
||
|
"""
|
||
|
X = self._validate_data(X, accept_sparse=True, reset=False)
|
||
|
Y_decision_chain = np.zeros((X.shape[0], len(self.estimators_)))
|
||
|
Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))
|
||
|
for chain_idx, estimator in enumerate(self.estimators_):
|
||
|
previous_predictions = Y_pred_chain[:, :chain_idx]
|
||
|
if sp.issparse(X):
|
||
|
X_aug = sp.hstack((X, previous_predictions))
|
||
|
else:
|
||
|
X_aug = np.hstack((X, previous_predictions))
|
||
|
Y_decision_chain[:, chain_idx] = estimator.decision_function(X_aug)
|
||
|
Y_pred_chain[:, chain_idx] = estimator.predict(X_aug)
|
||
|
|
||
|
inv_order = np.empty_like(self.order_)
|
||
|
inv_order[self.order_] = np.arange(len(self.order_))
|
||
|
Y_decision = Y_decision_chain[:, inv_order]
|
||
|
|
||
|
return Y_decision
|
||
|
|
||
|
def _more_tags(self):
|
||
|
return {"_skip_test": True, "multioutput_only": True}
|
||
|
|
||
|
|
||
|
class RegressorChain(MetaEstimatorMixin, RegressorMixin, _BaseChain):
|
||
|
"""A multi-label model that arranges regressions into a chain.
|
||
|
|
||
|
Each model makes a prediction in the order specified by the chain using
|
||
|
all of the available features provided to the model plus the predictions
|
||
|
of models that are earlier in the chain.
|
||
|
|
||
|
Read more in the :ref:`User Guide <regressorchain>`.
|
||
|
|
||
|
.. versionadded:: 0.20
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
base_estimator : estimator
|
||
|
The base estimator from which the regressor chain is built.
|
||
|
|
||
|
order : array-like of shape (n_outputs,) or 'random', default=None
|
||
|
If `None`, the order will be determined by the order of columns in
|
||
|
the label matrix Y.::
|
||
|
|
||
|
order = [0, 1, 2, ..., Y.shape[1] - 1]
|
||
|
|
||
|
The order of the chain can be explicitly set by providing a list of
|
||
|
integers. For example, for a chain of length 5.::
|
||
|
|
||
|
order = [1, 3, 2, 4, 0]
|
||
|
|
||
|
means that the first model in the chain will make predictions for
|
||
|
column 1 in the Y matrix, the second model will make predictions
|
||
|
for column 3, etc.
|
||
|
|
||
|
If order is 'random' a random ordering will be used.
|
||
|
|
||
|
cv : int, cross-validation generator or an iterable, default=None
|
||
|
Determines whether to use cross validated predictions or true
|
||
|
labels for the results of previous estimators in the chain.
|
||
|
Possible inputs for cv are:
|
||
|
|
||
|
- None, to use true labels when fitting,
|
||
|
- integer, to specify the number of folds in a (Stratified)KFold,
|
||
|
- :term:`CV splitter`,
|
||
|
- An iterable yielding (train, test) splits as arrays of indices.
|
||
|
|
||
|
random_state : int, RandomState instance or None, optional (default=None)
|
||
|
If ``order='random'``, determines random number generation for the
|
||
|
chain order.
|
||
|
In addition, it controls the random seed given at each `base_estimator`
|
||
|
at each chaining iteration. Thus, it is only used when `base_estimator`
|
||
|
exposes a `random_state`.
|
||
|
Pass an int for reproducible output across multiple function calls.
|
||
|
See :term:`Glossary <random_state>`.
|
||
|
|
||
|
verbose : bool, default=False
|
||
|
If True, chain progress is output as each model is completed.
|
||
|
|
||
|
.. versionadded:: 1.2
|
||
|
|
||
|
Attributes
|
||
|
----------
|
||
|
estimators_ : list
|
||
|
A list of clones of base_estimator.
|
||
|
|
||
|
order_ : list
|
||
|
The order of labels in the classifier chain.
|
||
|
|
||
|
n_features_in_ : int
|
||
|
Number of features seen during :term:`fit`. Only defined if the
|
||
|
underlying `base_estimator` exposes such an attribute when fit.
|
||
|
|
||
|
.. versionadded:: 0.24
|
||
|
|
||
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
||
|
Names of features seen during :term:`fit`. Defined only when `X`
|
||
|
has feature names that are all strings.
|
||
|
|
||
|
.. versionadded:: 1.0
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
ClassifierChain : Equivalent for classification.
|
||
|
MultiOutputRegressor : Learns each output independently rather than
|
||
|
chaining.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> from sklearn.multioutput import RegressorChain
|
||
|
>>> from sklearn.linear_model import LogisticRegression
|
||
|
>>> logreg = LogisticRegression(solver='lbfgs',multi_class='multinomial')
|
||
|
>>> X, Y = [[1, 0], [0, 1], [1, 1]], [[0, 2], [1, 1], [2, 0]]
|
||
|
>>> chain = RegressorChain(base_estimator=logreg, order=[0, 1]).fit(X, Y)
|
||
|
>>> chain.predict(X)
|
||
|
array([[0., 2.],
|
||
|
[1., 1.],
|
||
|
[2., 0.]])
|
||
|
"""
|
||
|
|
||
|
def fit(self, X, Y, **fit_params):
|
||
|
"""Fit the model to data matrix X and targets Y.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||
|
The input data.
|
||
|
|
||
|
Y : array-like of shape (n_samples, n_classes)
|
||
|
The target values.
|
||
|
|
||
|
**fit_params : dict of string -> object
|
||
|
Parameters passed to the `fit` method at each step
|
||
|
of the regressor chain.
|
||
|
|
||
|
.. versionadded:: 0.23
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
self : object
|
||
|
Returns a fitted instance.
|
||
|
"""
|
||
|
self._validate_params()
|
||
|
|
||
|
super().fit(X, Y, **fit_params)
|
||
|
return self
|
||
|
|
||
|
def _more_tags(self):
|
||
|
return {"multioutput_only": True}
|