353 lines
12 KiB
Python
353 lines
12 KiB
Python
# Authors: Andreas Mueller <andreas.mueller@columbia.edu>
|
|
# Guillaume Lemaitre <guillaume.lemaitre@inria.fr>
|
|
# License: BSD 3 clause
|
|
|
|
import warnings
|
|
|
|
import numpy as np
|
|
|
|
from ..base import BaseEstimator, RegressorMixin, _fit_context, clone
|
|
from ..exceptions import NotFittedError
|
|
from ..preprocessing import FunctionTransformer
|
|
from ..utils import _safe_indexing, check_array
|
|
from ..utils._param_validation import HasMethods
|
|
from ..utils._tags import _safe_tags
|
|
from ..utils.metadata_routing import (
|
|
_raise_for_unsupported_routing,
|
|
_RoutingNotSupportedMixin,
|
|
)
|
|
from ..utils.validation import check_is_fitted
|
|
|
|
__all__ = ["TransformedTargetRegressor"]
|
|
|
|
|
|
class TransformedTargetRegressor(
|
|
_RoutingNotSupportedMixin, RegressorMixin, BaseEstimator
|
|
):
|
|
"""Meta-estimator to regress on a transformed target.
|
|
|
|
Useful for applying a non-linear transformation to the target `y` in
|
|
regression problems. This transformation can be given as a Transformer
|
|
such as the :class:`~sklearn.preprocessing.QuantileTransformer` or as a
|
|
function and its inverse such as `np.log` and `np.exp`.
|
|
|
|
The computation during :meth:`fit` is::
|
|
|
|
regressor.fit(X, func(y))
|
|
|
|
or::
|
|
|
|
regressor.fit(X, transformer.transform(y))
|
|
|
|
The computation during :meth:`predict` is::
|
|
|
|
inverse_func(regressor.predict(X))
|
|
|
|
or::
|
|
|
|
transformer.inverse_transform(regressor.predict(X))
|
|
|
|
Read more in the :ref:`User Guide <transformed_target_regressor>`.
|
|
|
|
.. versionadded:: 0.20
|
|
|
|
Parameters
|
|
----------
|
|
regressor : object, default=None
|
|
Regressor object such as derived from
|
|
:class:`~sklearn.base.RegressorMixin`. This regressor will
|
|
automatically be cloned each time prior to fitting. If `regressor is
|
|
None`, :class:`~sklearn.linear_model.LinearRegression` is created and used.
|
|
|
|
transformer : object, default=None
|
|
Estimator object such as derived from
|
|
:class:`~sklearn.base.TransformerMixin`. Cannot be set at the same time
|
|
as `func` and `inverse_func`. If `transformer is None` as well as
|
|
`func` and `inverse_func`, the transformer will be an identity
|
|
transformer. Note that the transformer will be cloned during fitting.
|
|
Also, the transformer is restricting `y` to be a numpy array.
|
|
|
|
func : function, default=None
|
|
Function to apply to `y` before passing to :meth:`fit`. Cannot be set
|
|
at the same time as `transformer`. If `func is None`, the function used will be
|
|
the identity function. If `func` is set, `inverse_func` also needs to be
|
|
provided. The function needs to return a 2-dimensional array.
|
|
|
|
inverse_func : function, default=None
|
|
Function to apply to the prediction of the regressor. Cannot be set at
|
|
the same time as `transformer`. The inverse function is used to return
|
|
predictions to the same space of the original training labels. If
|
|
`inverse_func` is set, `func` also needs to be provided. The inverse
|
|
function needs to return a 2-dimensional array.
|
|
|
|
check_inverse : bool, default=True
|
|
Whether to check that `transform` followed by `inverse_transform`
|
|
or `func` followed by `inverse_func` leads to the original targets.
|
|
|
|
Attributes
|
|
----------
|
|
regressor_ : object
|
|
Fitted regressor.
|
|
|
|
transformer_ : object
|
|
Transformer used in :meth:`fit` and :meth:`predict`.
|
|
|
|
n_features_in_ : int
|
|
Number of features seen during :term:`fit`. Only defined if the
|
|
underlying regressor exposes such an attribute when fit.
|
|
|
|
.. versionadded:: 0.24
|
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
Names of features seen during :term:`fit`. Defined only when `X`
|
|
has feature names that are all strings.
|
|
|
|
.. versionadded:: 1.0
|
|
|
|
See Also
|
|
--------
|
|
sklearn.preprocessing.FunctionTransformer : Construct a transformer from an
|
|
arbitrary callable.
|
|
|
|
Notes
|
|
-----
|
|
Internally, the target `y` is always converted into a 2-dimensional array
|
|
to be used by scikit-learn transformers. At the time of prediction, the
|
|
output will be reshaped to a have the same number of dimensions as `y`.
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> from sklearn.linear_model import LinearRegression
|
|
>>> from sklearn.compose import TransformedTargetRegressor
|
|
>>> tt = TransformedTargetRegressor(regressor=LinearRegression(),
|
|
... func=np.log, inverse_func=np.exp)
|
|
>>> X = np.arange(4).reshape(-1, 1)
|
|
>>> y = np.exp(2 * X).ravel()
|
|
>>> tt.fit(X, y)
|
|
TransformedTargetRegressor(...)
|
|
>>> tt.score(X, y)
|
|
1.0
|
|
>>> tt.regressor_.coef_
|
|
array([2.])
|
|
|
|
For a more detailed example use case refer to
|
|
:ref:`sphx_glr_auto_examples_compose_plot_transformed_target.py`.
|
|
"""
|
|
|
|
_parameter_constraints: dict = {
|
|
"regressor": [HasMethods(["fit", "predict"]), None],
|
|
"transformer": [HasMethods("transform"), None],
|
|
"func": [callable, None],
|
|
"inverse_func": [callable, None],
|
|
"check_inverse": ["boolean"],
|
|
}
|
|
|
|
def __init__(
|
|
self,
|
|
regressor=None,
|
|
*,
|
|
transformer=None,
|
|
func=None,
|
|
inverse_func=None,
|
|
check_inverse=True,
|
|
):
|
|
self.regressor = regressor
|
|
self.transformer = transformer
|
|
self.func = func
|
|
self.inverse_func = inverse_func
|
|
self.check_inverse = check_inverse
|
|
|
|
def _fit_transformer(self, y):
|
|
"""Check transformer and fit transformer.
|
|
|
|
Create the default transformer, fit it and make additional inverse
|
|
check on a subset (optional).
|
|
|
|
"""
|
|
if self.transformer is not None and (
|
|
self.func is not None or self.inverse_func is not None
|
|
):
|
|
raise ValueError(
|
|
"'transformer' and functions 'func'/'inverse_func' cannot both be set."
|
|
)
|
|
elif self.transformer is not None:
|
|
self.transformer_ = clone(self.transformer)
|
|
else:
|
|
if (self.func is not None and self.inverse_func is None) or (
|
|
self.func is None and self.inverse_func is not None
|
|
):
|
|
lacking_param, existing_param = (
|
|
("func", "inverse_func")
|
|
if self.func is None
|
|
else ("inverse_func", "func")
|
|
)
|
|
raise ValueError(
|
|
f"When '{existing_param}' is provided, '{lacking_param}' must also"
|
|
f" be provided. If {lacking_param} is supposed to be the default,"
|
|
" you need to explicitly pass it the identity function."
|
|
)
|
|
self.transformer_ = FunctionTransformer(
|
|
func=self.func,
|
|
inverse_func=self.inverse_func,
|
|
validate=True,
|
|
check_inverse=self.check_inverse,
|
|
)
|
|
# XXX: sample_weight is not currently passed to the
|
|
# transformer. However, if transformer starts using sample_weight, the
|
|
# code should be modified accordingly. At the time to consider the
|
|
# sample_prop feature, it is also a good use case to be considered.
|
|
self.transformer_.fit(y)
|
|
if self.check_inverse:
|
|
idx_selected = slice(None, None, max(1, y.shape[0] // 10))
|
|
y_sel = _safe_indexing(y, idx_selected)
|
|
y_sel_t = self.transformer_.transform(y_sel)
|
|
if not np.allclose(y_sel, self.transformer_.inverse_transform(y_sel_t)):
|
|
warnings.warn(
|
|
(
|
|
"The provided functions or transformer are"
|
|
" not strictly inverse of each other. If"
|
|
" you are sure you want to proceed regardless"
|
|
", set 'check_inverse=False'"
|
|
),
|
|
UserWarning,
|
|
)
|
|
|
|
@_fit_context(
|
|
# TransformedTargetRegressor.regressor/transformer are not validated yet.
|
|
prefer_skip_nested_validation=False
|
|
)
|
|
def fit(self, X, y, **fit_params):
|
|
"""Fit the model according to the given training data.
|
|
|
|
Parameters
|
|
----------
|
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
Training vector, where `n_samples` is the number of samples and
|
|
`n_features` is the number of features.
|
|
|
|
y : array-like of shape (n_samples,)
|
|
Target values.
|
|
|
|
**fit_params : dict
|
|
Parameters passed to the `fit` method of the underlying
|
|
regressor.
|
|
|
|
Returns
|
|
-------
|
|
self : object
|
|
Fitted estimator.
|
|
"""
|
|
_raise_for_unsupported_routing(self, "fit", **fit_params)
|
|
if y is None:
|
|
raise ValueError(
|
|
f"This {self.__class__.__name__} estimator "
|
|
"requires y to be passed, but the target y is None."
|
|
)
|
|
y = check_array(
|
|
y,
|
|
input_name="y",
|
|
accept_sparse=False,
|
|
force_all_finite=True,
|
|
ensure_2d=False,
|
|
dtype="numeric",
|
|
allow_nd=True,
|
|
)
|
|
|
|
# store the number of dimension of the target to predict an array of
|
|
# similar shape at predict
|
|
self._training_dim = y.ndim
|
|
|
|
# transformers are designed to modify X which is 2d dimensional, we
|
|
# need to modify y accordingly.
|
|
if y.ndim == 1:
|
|
y_2d = y.reshape(-1, 1)
|
|
else:
|
|
y_2d = y
|
|
self._fit_transformer(y_2d)
|
|
|
|
# transform y and convert back to 1d array if needed
|
|
y_trans = self.transformer_.transform(y_2d)
|
|
# FIXME: a FunctionTransformer can return a 1D array even when validate
|
|
# is set to True. Therefore, we need to check the number of dimension
|
|
# first.
|
|
if y_trans.ndim == 2 and y_trans.shape[1] == 1:
|
|
y_trans = y_trans.squeeze(axis=1)
|
|
|
|
if self.regressor is None:
|
|
from ..linear_model import LinearRegression
|
|
|
|
self.regressor_ = LinearRegression()
|
|
else:
|
|
self.regressor_ = clone(self.regressor)
|
|
|
|
self.regressor_.fit(X, y_trans, **fit_params)
|
|
|
|
if hasattr(self.regressor_, "feature_names_in_"):
|
|
self.feature_names_in_ = self.regressor_.feature_names_in_
|
|
|
|
return self
|
|
|
|
def predict(self, X, **predict_params):
|
|
"""Predict using the base regressor, applying inverse.
|
|
|
|
The regressor is used to predict and the `inverse_func` or
|
|
`inverse_transform` is applied before returning the prediction.
|
|
|
|
Parameters
|
|
----------
|
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
Samples.
|
|
|
|
**predict_params : dict of str -> object
|
|
Parameters passed to the `predict` method of the underlying
|
|
regressor.
|
|
|
|
Returns
|
|
-------
|
|
y_hat : ndarray of shape (n_samples,)
|
|
Predicted values.
|
|
"""
|
|
check_is_fitted(self)
|
|
pred = self.regressor_.predict(X, **predict_params)
|
|
if pred.ndim == 1:
|
|
pred_trans = self.transformer_.inverse_transform(pred.reshape(-1, 1))
|
|
else:
|
|
pred_trans = self.transformer_.inverse_transform(pred)
|
|
if (
|
|
self._training_dim == 1
|
|
and pred_trans.ndim == 2
|
|
and pred_trans.shape[1] == 1
|
|
):
|
|
pred_trans = pred_trans.squeeze(axis=1)
|
|
|
|
return pred_trans
|
|
|
|
def _more_tags(self):
|
|
regressor = self.regressor
|
|
if regressor is None:
|
|
from ..linear_model import LinearRegression
|
|
|
|
regressor = LinearRegression()
|
|
|
|
return {
|
|
"poor_score": True,
|
|
"multioutput": _safe_tags(regressor, key="multioutput"),
|
|
}
|
|
|
|
@property
|
|
def n_features_in_(self):
|
|
"""Number of features seen during :term:`fit`."""
|
|
# For consistency with other estimators we raise a AttributeError so
|
|
# that hasattr() returns False the estimator isn't fitted.
|
|
try:
|
|
check_is_fitted(self)
|
|
except NotFittedError as nfe:
|
|
raise AttributeError(
|
|
"{} object has no n_features_in_ attribute.".format(
|
|
self.__class__.__name__
|
|
)
|
|
) from nfe
|
|
|
|
return self.regressor_.n_features_in_
|